LLVM  17.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
24 #include "ARMTargetTransformInfo.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Twine.h"
65 #include "llvm/IR/Attributes.h"
66 #include "llvm/IR/CallingConv.h"
67 #include "llvm/IR/Constant.h"
68 #include "llvm/IR/Constants.h"
69 #include "llvm/IR/DataLayout.h"
70 #include "llvm/IR/DebugLoc.h"
71 #include "llvm/IR/DerivedTypes.h"
72 #include "llvm/IR/Function.h"
73 #include "llvm/IR/GlobalAlias.h"
74 #include "llvm/IR/GlobalValue.h"
75 #include "llvm/IR/GlobalVariable.h"
76 #include "llvm/IR/IRBuilder.h"
77 #include "llvm/IR/InlineAsm.h"
78 #include "llvm/IR/Instruction.h"
79 #include "llvm/IR/Instructions.h"
80 #include "llvm/IR/IntrinsicInst.h"
81 #include "llvm/IR/Intrinsics.h"
82 #include "llvm/IR/IntrinsicsARM.h"
83 #include "llvm/IR/Module.h"
84 #include "llvm/IR/PatternMatch.h"
85 #include "llvm/IR/Type.h"
86 #include "llvm/IR/User.h"
87 #include "llvm/IR/Value.h"
88 #include "llvm/MC/MCInstrDesc.h"
90 #include "llvm/MC/MCRegisterInfo.h"
91 #include "llvm/MC/MCSchedule.h"
94 #include "llvm/Support/Casting.h"
95 #include "llvm/Support/CodeGen.h"
97 #include "llvm/Support/Compiler.h"
98 #include "llvm/Support/Debug.h"
100 #include "llvm/Support/KnownBits.h"
102 #include "llvm/Support/MathExtras.h"
107 #include <algorithm>
108 #include <cassert>
109 #include <cstdint>
110 #include <cstdlib>
111 #include <iterator>
112 #include <limits>
113 #include <optional>
114 #include <string>
115 #include <tuple>
116 #include <utility>
117 #include <vector>
118 
119 using namespace llvm;
120 using namespace llvm::PatternMatch;
121 
122 #define DEBUG_TYPE "arm-isel"
123 
124 STATISTIC(NumTailCalls, "Number of tail calls");
125 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
126 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
127 STATISTIC(NumConstpoolPromoted,
128  "Number of constants with their storage promoted into constant pools");
129 
130 static cl::opt<bool>
131 ARMInterworking("arm-interworking", cl::Hidden,
132  cl::desc("Enable / disable ARM interworking (for debugging only)"),
133  cl::init(true));
134 
136  "arm-promote-constant", cl::Hidden,
137  cl::desc("Enable / disable promotion of unnamed_addr constants into "
138  "constant pools"),
139  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
141  "arm-promote-constant-max-size", cl::Hidden,
142  cl::desc("Maximum size of constant to promote into a constant pool"),
143  cl::init(64));
145  "arm-promote-constant-max-total", cl::Hidden,
146  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
147  cl::init(128));
148 
150 MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
151  cl::desc("Maximum interleave factor for MVE VLDn to generate."),
152  cl::init(2));
153 
154 // The APCS parameter registers.
155 static const MCPhysReg GPRArgRegs[] = {
156  ARM::R0, ARM::R1, ARM::R2, ARM::R3
157 };
158 
159 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
160  if (VT != PromotedLdStVT) {
161  setOperationAction(ISD::LOAD, VT, Promote);
162  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
163 
164  setOperationAction(ISD::STORE, VT, Promote);
165  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
166  }
167 
168  MVT ElemTy = VT.getVectorElementType();
169  if (ElemTy != MVT::f64)
170  setOperationAction(ISD::SETCC, VT, Custom);
171  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
172  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
173  if (ElemTy == MVT::i32) {
174  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
175  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
176  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
177  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
178  } else {
179  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
180  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
181  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
182  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
183  }
184  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
185  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
186  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
187  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
188  setOperationAction(ISD::SELECT, VT, Expand);
189  setOperationAction(ISD::SELECT_CC, VT, Expand);
190  setOperationAction(ISD::VSELECT, VT, Expand);
191  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
192  if (VT.isInteger()) {
193  setOperationAction(ISD::SHL, VT, Custom);
194  setOperationAction(ISD::SRA, VT, Custom);
195  setOperationAction(ISD::SRL, VT, Custom);
196  }
197 
198  // Neon does not support vector divide/remainder operations.
199  setOperationAction(ISD::SDIV, VT, Expand);
200  setOperationAction(ISD::UDIV, VT, Expand);
201  setOperationAction(ISD::FDIV, VT, Expand);
202  setOperationAction(ISD::SREM, VT, Expand);
203  setOperationAction(ISD::UREM, VT, Expand);
204  setOperationAction(ISD::FREM, VT, Expand);
205  setOperationAction(ISD::SDIVREM, VT, Expand);
206  setOperationAction(ISD::UDIVREM, VT, Expand);
207 
208  if (!VT.isFloatingPoint() &&
209  VT != MVT::v2i64 && VT != MVT::v1i64)
210  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211  setOperationAction(Opcode, VT, Legal);
212  if (!VT.isFloatingPoint())
213  for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
214  setOperationAction(Opcode, VT, Legal);
215 }
216 
217 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
218  addRegisterClass(VT, &ARM::DPRRegClass);
219  addTypeForNEON(VT, MVT::f64);
220 }
221 
222 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
223  addRegisterClass(VT, &ARM::DPairRegClass);
224  addTypeForNEON(VT, MVT::v2f64);
225 }
226 
227 void ARMTargetLowering::setAllExpand(MVT VT) {
228  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
229  setOperationAction(Opc, VT, Expand);
230 
231  // We support these really simple operations even on types where all
232  // the actual arithmetic has to be broken down into simpler
233  // operations or turned into library calls.
234  setOperationAction(ISD::BITCAST, VT, Legal);
235  setOperationAction(ISD::LOAD, VT, Legal);
236  setOperationAction(ISD::STORE, VT, Legal);
237  setOperationAction(ISD::UNDEF, VT, Legal);
238 }
239 
240 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
241  LegalizeAction Action) {
242  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
243  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
244  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
245 }
246 
247 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
248  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
249 
250  for (auto VT : IntTypes) {
251  addRegisterClass(VT, &ARM::MQPRRegClass);
252  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
253  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
254  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
255  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
256  setOperationAction(ISD::SHL, VT, Custom);
257  setOperationAction(ISD::SRA, VT, Custom);
258  setOperationAction(ISD::SRL, VT, Custom);
259  setOperationAction(ISD::SMIN, VT, Legal);
260  setOperationAction(ISD::SMAX, VT, Legal);
261  setOperationAction(ISD::UMIN, VT, Legal);
262  setOperationAction(ISD::UMAX, VT, Legal);
263  setOperationAction(ISD::ABS, VT, Legal);
264  setOperationAction(ISD::SETCC, VT, Custom);
265  setOperationAction(ISD::MLOAD, VT, Custom);
266  setOperationAction(ISD::MSTORE, VT, Legal);
267  setOperationAction(ISD::CTLZ, VT, Legal);
268  setOperationAction(ISD::CTTZ, VT, Custom);
269  setOperationAction(ISD::BITREVERSE, VT, Legal);
270  setOperationAction(ISD::BSWAP, VT, Legal);
271  setOperationAction(ISD::SADDSAT, VT, Legal);
272  setOperationAction(ISD::UADDSAT, VT, Legal);
273  setOperationAction(ISD::SSUBSAT, VT, Legal);
274  setOperationAction(ISD::USUBSAT, VT, Legal);
275  setOperationAction(ISD::ABDS, VT, Legal);
276  setOperationAction(ISD::ABDU, VT, Legal);
277  setOperationAction(ISD::AVGFLOORS, VT, Legal);
278  setOperationAction(ISD::AVGFLOORU, VT, Legal);
279  setOperationAction(ISD::AVGCEILS, VT, Legal);
280  setOperationAction(ISD::AVGCEILU, VT, Legal);
281 
282  // No native support for these.
283  setOperationAction(ISD::UDIV, VT, Expand);
284  setOperationAction(ISD::SDIV, VT, Expand);
285  setOperationAction(ISD::UREM, VT, Expand);
286  setOperationAction(ISD::SREM, VT, Expand);
287  setOperationAction(ISD::UDIVREM, VT, Expand);
288  setOperationAction(ISD::SDIVREM, VT, Expand);
289  setOperationAction(ISD::CTPOP, VT, Expand);
290  setOperationAction(ISD::SELECT, VT, Expand);
291  setOperationAction(ISD::SELECT_CC, VT, Expand);
292 
293  // Vector reductions
294  setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
295  setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
296  setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
297  setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
298  setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
299  setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
300  setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
301  setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
302  setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
303 
304  if (!HasMVEFP) {
305  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
306  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
307  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
308  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
309  } else {
310  setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
311  setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
312  }
313 
314  // Pre and Post inc are supported on loads and stores
315  for (unsigned im = (unsigned)ISD::PRE_INC;
317  setIndexedLoadAction(im, VT, Legal);
318  setIndexedStoreAction(im, VT, Legal);
319  setIndexedMaskedLoadAction(im, VT, Legal);
320  setIndexedMaskedStoreAction(im, VT, Legal);
321  }
322  }
323 
324  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
325  for (auto VT : FloatTypes) {
326  addRegisterClass(VT, &ARM::MQPRRegClass);
327  if (!HasMVEFP)
328  setAllExpand(VT);
329 
330  // These are legal or custom whether we have MVE.fp or not
331  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
332  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
333  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
334  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
335  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
336  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
337  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
338  setOperationAction(ISD::SETCC, VT, Custom);
339  setOperationAction(ISD::MLOAD, VT, Custom);
340  setOperationAction(ISD::MSTORE, VT, Legal);
341  setOperationAction(ISD::SELECT, VT, Expand);
342  setOperationAction(ISD::SELECT_CC, VT, Expand);
343 
344  // Pre and Post inc are supported on loads and stores
345  for (unsigned im = (unsigned)ISD::PRE_INC;
347  setIndexedLoadAction(im, VT, Legal);
348  setIndexedStoreAction(im, VT, Legal);
349  setIndexedMaskedLoadAction(im, VT, Legal);
350  setIndexedMaskedStoreAction(im, VT, Legal);
351  }
352 
353  if (HasMVEFP) {
354  setOperationAction(ISD::FMINNUM, VT, Legal);
355  setOperationAction(ISD::FMAXNUM, VT, Legal);
356  setOperationAction(ISD::FROUND, VT, Legal);
357  setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
358  setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
359  setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
360  setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
361 
362  // No native support for these.
363  setOperationAction(ISD::FDIV, VT, Expand);
364  setOperationAction(ISD::FREM, VT, Expand);
365  setOperationAction(ISD::FSQRT, VT, Expand);
366  setOperationAction(ISD::FSIN, VT, Expand);
367  setOperationAction(ISD::FCOS, VT, Expand);
368  setOperationAction(ISD::FPOW, VT, Expand);
369  setOperationAction(ISD::FLOG, VT, Expand);
370  setOperationAction(ISD::FLOG2, VT, Expand);
371  setOperationAction(ISD::FLOG10, VT, Expand);
372  setOperationAction(ISD::FEXP, VT, Expand);
373  setOperationAction(ISD::FEXP2, VT, Expand);
374  setOperationAction(ISD::FNEARBYINT, VT, Expand);
375  }
376  }
377 
378  // Custom Expand smaller than legal vector reductions to prevent false zero
379  // items being added.
380  setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
381  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
382  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
383  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
384  setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
385  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
386  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
387  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
388 
389  // We 'support' these types up to bitcast/load/store level, regardless of
390  // MVE integer-only / float support. Only doing FP data processing on the FP
391  // vector types is inhibited at integer-only level.
392  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
393  for (auto VT : LongTypes) {
394  addRegisterClass(VT, &ARM::MQPRRegClass);
395  setAllExpand(VT);
396  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
398  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
399  setOperationAction(ISD::VSELECT, VT, Legal);
400  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
401  }
402  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
403 
404  // We can do bitwise operations on v2i64 vectors
405  setOperationAction(ISD::AND, MVT::v2i64, Legal);
406  setOperationAction(ISD::OR, MVT::v2i64, Legal);
407  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
408 
409  // It is legal to extload from v4i8 to v4i16 or v4i32.
410  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
411  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
412  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
413 
414  // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
415  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
416  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
417  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
418  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
419  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
420 
421  // Some truncating stores are legal too.
422  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
423  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
424  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
425 
426  // Pre and Post inc on these are legal, given the correct extends
427  for (unsigned im = (unsigned)ISD::PRE_INC;
429  for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
430  setIndexedLoadAction(im, VT, Legal);
431  setIndexedStoreAction(im, VT, Legal);
432  setIndexedMaskedLoadAction(im, VT, Legal);
433  setIndexedMaskedStoreAction(im, VT, Legal);
434  }
435  }
436 
437  // Predicate types
438  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
439  for (auto VT : pTypes) {
440  addRegisterClass(VT, &ARM::VCCRRegClass);
441  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
442  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
443  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
444  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
445  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
446  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
447  setOperationAction(ISD::SETCC, VT, Custom);
448  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
449  setOperationAction(ISD::LOAD, VT, Custom);
450  setOperationAction(ISD::STORE, VT, Custom);
451  setOperationAction(ISD::TRUNCATE, VT, Custom);
452  setOperationAction(ISD::VSELECT, VT, Expand);
453  setOperationAction(ISD::SELECT, VT, Expand);
454  setOperationAction(ISD::SELECT_CC, VT, Expand);
455 
456  if (!HasMVEFP) {
457  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
458  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
459  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
460  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
461  }
462  }
463  setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
464  setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand);
465  setOperationAction(ISD::AND, MVT::v2i1, Expand);
466  setOperationAction(ISD::OR, MVT::v2i1, Expand);
467  setOperationAction(ISD::XOR, MVT::v2i1, Expand);
468  setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand);
469  setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand);
470  setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand);
471  setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand);
472 
473  setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
474  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
475  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
476  setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
477  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
478  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
479  setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
480  setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
481 }
482 
484  const ARMSubtarget &STI)
485  : TargetLowering(TM), Subtarget(&STI) {
486  RegInfo = Subtarget->getRegisterInfo();
487  Itins = Subtarget->getInstrItineraryData();
488 
491 
492  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
493  !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
494  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
495  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
496  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
497  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
499  }
500 
501  if (Subtarget->isTargetMachO()) {
502  // Uses VFP for Thumb libfuncs if available.
503  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
504  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
505  static const struct {
506  const RTLIB::Libcall Op;
507  const char * const Name;
508  const ISD::CondCode Cond;
509  } LibraryCalls[] = {
510  // Single-precision floating-point arithmetic.
511  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
512  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
513  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
514  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
515 
516  // Double-precision floating-point arithmetic.
517  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
518  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
519  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
520  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
521 
522  // Single-precision comparisons.
523  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
524  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
525  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
526  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
527  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
528  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
529  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
530 
531  // Double-precision comparisons.
532  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
533  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
534  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
535  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
536  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
537  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
538  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
539 
540  // Floating-point to integer conversions.
541  // i64 conversions are done via library routines even when generating VFP
542  // instructions, so use the same ones.
543  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
544  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
545  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
546  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
547 
548  // Conversions between floating types.
549  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
550  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
551 
552  // Integer to floating-point conversions.
553  // i64 conversions are done via library routines even when generating VFP
554  // instructions, so use the same ones.
555  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
556  // e.g., __floatunsidf vs. __floatunssidfvfp.
557  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
558  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
559  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
560  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
561  };
562 
563  for (const auto &LC : LibraryCalls) {
564  setLibcallName(LC.Op, LC.Name);
565  if (LC.Cond != ISD::SETCC_INVALID)
566  setCmpLibcallCC(LC.Op, LC.Cond);
567  }
568  }
569  }
570 
571  // These libcalls are not available in 32-bit.
572  setLibcallName(RTLIB::SHL_I128, nullptr);
573  setLibcallName(RTLIB::SRL_I128, nullptr);
574  setLibcallName(RTLIB::SRA_I128, nullptr);
575  setLibcallName(RTLIB::MUL_I128, nullptr);
576  setLibcallName(RTLIB::MULO_I64, nullptr);
577  setLibcallName(RTLIB::MULO_I128, nullptr);
578 
579  // RTLIB
580  if (Subtarget->isAAPCS_ABI() &&
581  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
582  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
583  static const struct {
584  const RTLIB::Libcall Op;
585  const char * const Name;
586  const CallingConv::ID CC;
587  const ISD::CondCode Cond;
588  } LibraryCalls[] = {
589  // Double-precision floating-point arithmetic helper functions
590  // RTABI chapter 4.1.2, Table 2
591  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
592  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
593  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
594  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
595 
596  // Double-precision floating-point comparison helper functions
597  // RTABI chapter 4.1.2, Table 3
598  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
599  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
600  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
601  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
602  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
603  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
604  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
605 
606  // Single-precision floating-point arithmetic helper functions
607  // RTABI chapter 4.1.2, Table 4
608  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
609  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
610  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
611  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612 
613  // Single-precision floating-point comparison helper functions
614  // RTABI chapter 4.1.2, Table 5
615  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
616  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
617  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
618  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
619  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
620  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
621  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
622 
623  // Floating-point to integer conversions.
624  // RTABI chapter 4.1.2, Table 6
625  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
626  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
627  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
628  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
629  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 
634  // Conversions between floating types.
635  // RTABI chapter 4.1.2, Table 7
636  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639 
640  // Integer to floating-point conversions.
641  // RTABI chapter 4.1.2, Table 8
642  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
646  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
647  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 
651  // Long long helper functions
652  // RTABI chapter 4.2, Table 9
653  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
657 
658  // Integer division functions
659  // RTABI chapter 4.3.1
660  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
663  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
664  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
665  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668  };
669 
670  for (const auto &LC : LibraryCalls) {
671  setLibcallName(LC.Op, LC.Name);
672  setLibcallCallingConv(LC.Op, LC.CC);
673  if (LC.Cond != ISD::SETCC_INVALID)
674  setCmpLibcallCC(LC.Op, LC.Cond);
675  }
676 
677  // EABI dependent RTLIB
678  if (TM.Options.EABIVersion == EABI::EABI4 ||
679  TM.Options.EABIVersion == EABI::EABI5) {
680  static const struct {
681  const RTLIB::Libcall Op;
682  const char *const Name;
683  const CallingConv::ID CC;
684  const ISD::CondCode Cond;
685  } MemOpsLibraryCalls[] = {
686  // Memory operations
687  // RTABI chapter 4.3.4
689  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
690  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
691  };
692 
693  for (const auto &LC : MemOpsLibraryCalls) {
694  setLibcallName(LC.Op, LC.Name);
695  setLibcallCallingConv(LC.Op, LC.CC);
696  if (LC.Cond != ISD::SETCC_INVALID)
697  setCmpLibcallCC(LC.Op, LC.Cond);
698  }
699  }
700  }
701 
702  if (Subtarget->isTargetWindows()) {
703  static const struct {
704  const RTLIB::Libcall Op;
705  const char * const Name;
706  const CallingConv::ID CC;
707  } LibraryCalls[] = {
708  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
709  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
710  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
711  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
712  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
713  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
714  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
715  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
716  };
717 
718  for (const auto &LC : LibraryCalls) {
719  setLibcallName(LC.Op, LC.Name);
720  setLibcallCallingConv(LC.Op, LC.CC);
721  }
722  }
723 
724  // Use divmod compiler-rt calls for iOS 5.0 and later.
725  if (Subtarget->isTargetMachO() &&
726  !(Subtarget->isTargetIOS() &&
727  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
728  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
729  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
730  }
731 
732  // The half <-> float conversion functions are always soft-float on
733  // non-watchos platforms, but are needed for some targets which use a
734  // hard-float calling convention by default.
735  if (!Subtarget->isTargetWatchABI()) {
736  if (Subtarget->isAAPCS_ABI()) {
737  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
738  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
739  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
740  } else {
741  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
742  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
743  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
744  }
745  }
746 
747  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
748  // a __gnu_ prefix (which is the default).
749  if (Subtarget->isTargetAEABI()) {
750  static const struct {
751  const RTLIB::Libcall Op;
752  const char * const Name;
753  const CallingConv::ID CC;
754  } LibraryCalls[] = {
755  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
756  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
757  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
758  };
759 
760  for (const auto &LC : LibraryCalls) {
761  setLibcallName(LC.Op, LC.Name);
762  setLibcallCallingConv(LC.Op, LC.CC);
763  }
764  }
765 
766  if (Subtarget->isThumb1Only())
767  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
768  else
769  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
770 
771  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
772  Subtarget->hasFPRegs()) {
773  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
774  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
775 
780 
781  if (!Subtarget->hasVFP2Base())
782  setAllExpand(MVT::f32);
783  if (!Subtarget->hasFP64())
784  setAllExpand(MVT::f64);
785  }
786 
787  if (Subtarget->hasFullFP16()) {
788  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
791 
794  }
795 
796  if (Subtarget->hasBF16()) {
797  addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
798  setAllExpand(MVT::bf16);
799  if (!Subtarget->hasFullFP16())
801  }
802 
803  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
804  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
805  setTruncStoreAction(VT, InnerVT, Expand);
806  addAllExtLoads(VT, InnerVT, Expand);
807  }
808 
811 
813  }
814 
817 
820 
821  if (Subtarget->hasMVEIntegerOps())
822  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
823 
824  // Combine low-overhead loop intrinsics so that we can lower i1 types.
825  if (Subtarget->hasLOB()) {
827  }
828 
829  if (Subtarget->hasNEON()) {
830  addDRTypeForNEON(MVT::v2f32);
831  addDRTypeForNEON(MVT::v8i8);
832  addDRTypeForNEON(MVT::v4i16);
833  addDRTypeForNEON(MVT::v2i32);
834  addDRTypeForNEON(MVT::v1i64);
835 
836  addQRTypeForNEON(MVT::v4f32);
837  addQRTypeForNEON(MVT::v2f64);
838  addQRTypeForNEON(MVT::v16i8);
839  addQRTypeForNEON(MVT::v8i16);
840  addQRTypeForNEON(MVT::v4i32);
841  addQRTypeForNEON(MVT::v2i64);
842 
843  if (Subtarget->hasFullFP16()) {
844  addQRTypeForNEON(MVT::v8f16);
845  addDRTypeForNEON(MVT::v4f16);
846  }
847 
848  if (Subtarget->hasBF16()) {
849  addQRTypeForNEON(MVT::v8bf16);
850  addDRTypeForNEON(MVT::v4bf16);
851  }
852  }
853 
854  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
855  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
856  // none of Neon, MVE or VFP supports any arithmetic operations on it.
860  // FIXME: Code duplication: FDIV and FREM are expanded always, see
861  // ARMTargetLowering::addTypeForNEON method for details.
864  // FIXME: Create unittest.
865  // In another words, find a way when "copysign" appears in DAG with vector
866  // operands.
868  // FIXME: Code duplication: SETCC has custom operation action, see
869  // ARMTargetLowering::addTypeForNEON method for details.
871  // FIXME: Create unittest for FNEG and for FABS.
883  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
890  }
891 
892  if (Subtarget->hasNEON()) {
893  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
894  // supported for v4f32.
909 
910  // Mark v2f32 intrinsics.
925 
926  // Neon does not support some operations on v1i64 and v2i64 types.
928  // Custom handling for some quad-vector types to detect VMULL.
932  // Custom handling for some vector types to avoid expensive expansions
937  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
938  // a destination type that is wider than the source, and nor does
939  // it have a FP_TO_[SU]INT instruction with a narrower destination than
940  // source.
949 
952 
953  // NEON does not have single instruction CTPOP for vectors with element
954  // types wider than 8-bits. However, custom lowering can leverage the
955  // v8i8/v16i8 vcnt instruction.
962 
965 
966  // NEON does not have single instruction CTTZ for vectors.
971 
976 
981 
986 
987  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
990  }
991 
992  // NEON only has FMA instructions as of VFP4.
993  if (!Subtarget->hasVFP4Base()) {
996  }
997 
1000 
1001  // It is legal to extload from v4i8 to v4i16 or v4i32.
1003  MVT::v2i32}) {
1008  }
1009  }
1010  }
1011 
1012  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1019  }
1020  if (Subtarget->hasMVEIntegerOps()) {
1023  ISD::SETCC});
1024  }
1025  if (Subtarget->hasMVEFloatOps()) {
1027  }
1028 
1029  if (!Subtarget->hasFP64()) {
1030  // When targeting a floating-point unit with only single-precision
1031  // operations, f64 is legal for the few double-precision instructions which
1032  // are present However, no double-precision operations other than moves,
1033  // loads and stores are provided by the hardware.
1070  }
1071 
1072  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1075  if (Subtarget->hasFullFP16()) {
1078  }
1079  }
1080 
1081  if (!Subtarget->hasFP16()) {
1084  }
1085 
1087 
1088  // ARM does not have floating-point extending loads.
1089  for (MVT VT : MVT::fp_valuetypes()) {
1092  }
1093 
1094  // ... or truncating stores
1098 
1099  // ARM does not have i1 sign extending load.
1100  for (MVT VT : MVT::integer_valuetypes())
1102 
1103  // ARM supports all 4 flavors of integer indexed load / store.
1104  if (!Subtarget->isThumb1Only()) {
1105  for (unsigned im = (unsigned)ISD::PRE_INC;
1115  }
1116  } else {
1117  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1120  }
1121 
1126 
1129  if (Subtarget->hasDSP()) {
1138  }
1139  if (Subtarget->hasBaseDSP()) {
1142  }
1143 
1144  // i64 operation support.
1147  if (Subtarget->isThumb1Only()) {
1150  }
1151  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1152  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1154 
1164 
1165  // MVE lowers 64 bit shifts to lsll and lsrl
1166  // assuming that ISD::SRL and SRA of i64 are already marked custom
1167  if (Subtarget->hasMVEIntegerOps())
1169 
1170  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1171  if (Subtarget->isThumb1Only()) {
1175  }
1176 
1177  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1179 
1180  // ARM does not have ROTL.
1182  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1185  }
1188  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1191  }
1192 
1193  // @llvm.readcyclecounter requires the Performance Monitors extension.
1194  // Default to the 0 expansion on unsupported platforms.
1195  // FIXME: Technically there are older ARM CPUs that have
1196  // implementation-specific ways of obtaining this information.
1197  if (Subtarget->hasPerfMon())
1199 
1200  // Only ARMv6 has BSWAP.
1201  if (!Subtarget->hasV6Ops())
1203 
1204  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1205  : Subtarget->hasDivideInARMMode();
1206  if (!hasDivide) {
1207  // These are expanded into libcalls if the cpu doesn't have HW divider.
1210  }
1211 
1212  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1215 
1218  }
1219 
1222 
1223  // Register based DivRem for AEABI (RTABI 4.2)
1224  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1225  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1226  Subtarget->isTargetWindows()) {
1229  HasStandaloneRem = false;
1230 
1231  if (Subtarget->isTargetWindows()) {
1232  const struct {
1233  const RTLIB::Libcall Op;
1234  const char * const Name;
1235  const CallingConv::ID CC;
1236  } LibraryCalls[] = {
1237  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1238  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1239  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1240  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1241 
1242  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1243  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1244  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1245  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1246  };
1247 
1248  for (const auto &LC : LibraryCalls) {
1249  setLibcallName(LC.Op, LC.Name);
1250  setLibcallCallingConv(LC.Op, LC.CC);
1251  }
1252  } else {
1253  const struct {
1254  const RTLIB::Libcall Op;
1255  const char * const Name;
1256  const CallingConv::ID CC;
1257  } LibraryCalls[] = {
1258  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1259  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1260  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1261  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1262 
1263  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1264  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1265  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1266  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1267  };
1268 
1269  for (const auto &LC : LibraryCalls) {
1270  setLibcallName(LC.Op, LC.Name);
1271  setLibcallCallingConv(LC.Op, LC.CC);
1272  }
1273  }
1274 
1279  } else {
1282  }
1283 
1284  if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1285  // MSVCRT doesn't have powi; fall back to pow
1286  setLibcallName(RTLIB::POWI_F32, nullptr);
1287  setLibcallName(RTLIB::POWI_F64, nullptr);
1288  }
1289 
1294 
1297 
1298  // Use the default implementation.
1305 
1306  if (Subtarget->isTargetWindows())
1308  else
1310 
1311  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1312  // the default expansion.
1313  InsertFencesForAtomic = false;
1314  if (Subtarget->hasAnyDataBarrier() &&
1315  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1316  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1317  // to ldrex/strex loops already.
1319  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1321 
1322  // On v8, we have particularly efficient implementations of atomic fences
1323  // if they can be combined with nearby atomic loads and stores.
1324  if (!Subtarget->hasAcquireRelease() ||
1325  getTargetMachine().getOptLevel() == 0) {
1326  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1327  InsertFencesForAtomic = true;
1328  }
1329  } else {
1330  // If there's anything we can use as a barrier, go through custom lowering
1331  // for ATOMIC_FENCE.
1332  // If target has DMB in thumb, Fences can be inserted.
1333  if (Subtarget->hasDataBarrier())
1334  InsertFencesForAtomic = true;
1335 
1337  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1338 
1339  // Set them all for expansion, which will force libcalls.
1352  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1353  // Unordered/Monotonic case.
1354  if (!InsertFencesForAtomic) {
1357  }
1358  }
1359 
1360  // Compute supported atomic widths.
1361  if (Subtarget->isTargetLinux() ||
1362  (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1363  // For targets where __sync_* routines are reliably available, we use them
1364  // if necessary.
1365  //
1366  // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1367  // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1368  //
1369  // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1370  // such targets should provide __sync_* routines, which use the ARM mode
1371  // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1372  // encoding; see ARMISD::MEMBARRIER_MCR.)
1374  } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1375  Subtarget->hasForced32BitAtomics()) {
1376  // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1378  } else {
1379  // We can't assume anything about other targets; just use libatomic
1380  // routines.
1382  }
1383 
1385 
1387 
1388  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1389  if (!Subtarget->hasV6Ops()) {
1392  }
1394 
1395  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1396  !Subtarget->isThumb1Only()) {
1397  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1398  // iff target supports vfp2.
1402  }
1403 
1404  // We want to custom lower some of our intrinsics.
1409  if (Subtarget->useSjLjEH())
1410  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1411 
1421  if (Subtarget->hasFullFP16()) {
1425  }
1426 
1428 
1431  if (Subtarget->hasFullFP16())
1436 
1437  // We don't support sin/cos/fmod/copysign/pow
1446  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1447  !Subtarget->isThumb1Only()) {
1450  }
1453 
1454  if (!Subtarget->hasVFP4Base()) {
1457  }
1458 
1459  // Various VFP goodness
1460  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1461  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1462  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1465  }
1466 
1467  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1468  if (!Subtarget->hasFP16()) {
1471  }
1472 
1473  // Strict floating-point comparisons need custom lowering.
1480  }
1481 
1482  // Use __sincos_stret if available.
1483  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1484  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1487  }
1488 
1489  // FP-ARMv8 implements a lot of rounding-like FP operations.
1490  if (Subtarget->hasFPARMv8Base()) {
1499  if (Subtarget->hasNEON()) {
1504  }
1505 
1506  if (Subtarget->hasFP64()) {
1515  }
1516  }
1517 
1518  // FP16 often need to be promoted to call lib functions
1519  if (Subtarget->hasFullFP16()) {
1532 
1534  }
1535 
1536  if (Subtarget->hasNEON()) {
1537  // vmin and vmax aren't available in a scalar form, so we can use
1538  // a NEON instruction with an undef lane instead. This has a performance
1539  // penalty on some cores, so we don't do this unless we have been
1540  // asked to by the core tuning model.
1541  if (Subtarget->useNEONForSinglePrecisionFP()) {
1546  }
1551 
1552  if (Subtarget->hasFullFP16()) {
1557 
1562  }
1563  }
1564 
1565  // We have target-specific dag combine patterns for the following nodes:
1566  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1569 
1570  if (Subtarget->hasMVEIntegerOps())
1572 
1573  if (Subtarget->hasV6Ops())
1575  if (Subtarget->isThumb1Only())
1577  // Attempt to lower smin/smax to ssat/usat
1578  if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1579  Subtarget->isThumb2()) {
1581  }
1582 
1584 
1585  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1586  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1588  else
1590 
1591  //// temporary - rewrite interface to use type
1592  MaxStoresPerMemset = 8;
1594  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1596  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1598 
1599  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1600  // are at least 4 bytes aligned.
1602 
1603  // Prefer likely predicted branches to selects on out-of-order cores.
1604  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1605 
1606  setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1607 
1608  setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1609 
1610  if (Subtarget->isThumb() || Subtarget->isThumb2())
1612 }
1613 
1615  return Subtarget->useSoftFloat();
1616 }
1617 
1618 // FIXME: It might make sense to define the representative register class as the
1619 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1620 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1621 // SPR's representative would be DPR_VFP2. This should work well if register
1622 // pressure tracking were modified such that a register use would increment the
1623 // pressure of the register class's representative and all of it's super
1624 // classes' representatives transitively. We have not implemented this because
1625 // of the difficulty prior to coalescing of modeling operand register classes
1626 // due to the common occurrence of cross class copies and subregister insertions
1627 // and extractions.
1628 std::pair<const TargetRegisterClass *, uint8_t>
1630  MVT VT) const {
1631  const TargetRegisterClass *RRC = nullptr;
1632  uint8_t Cost = 1;
1633  switch (VT.SimpleTy) {
1634  default:
1636  // Use DPR as representative register class for all floating point
1637  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1638  // the cost is 1 for both f32 and f64.
1639  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1640  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1641  RRC = &ARM::DPRRegClass;
1642  // When NEON is used for SP, only half of the register file is available
1643  // because operations that define both SP and DP results will be constrained
1644  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1645  // coalescing by double-counting the SP regs. See the FIXME above.
1646  if (Subtarget->useNEONForSinglePrecisionFP())
1647  Cost = 2;
1648  break;
1649  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1650  case MVT::v4f32: case MVT::v2f64:
1651  RRC = &ARM::DPRRegClass;
1652  Cost = 2;
1653  break;
1654  case MVT::v4i64:
1655  RRC = &ARM::DPRRegClass;
1656  Cost = 4;
1657  break;
1658  case MVT::v8i64:
1659  RRC = &ARM::DPRRegClass;
1660  Cost = 8;
1661  break;
1662  }
1663  return std::make_pair(RRC, Cost);
1664 }
1665 
1666 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1667 #define MAKE_CASE(V) \
1668  case V: \
1669  return #V;
1670  switch ((ARMISD::NodeType)Opcode) {
1671  case ARMISD::FIRST_NUMBER:
1672  break;
1876 #undef MAKE_CASE
1877  }
1878  return nullptr;
1879 }
1880 
1882  EVT VT) const {
1883  if (!VT.isVector())
1884  return getPointerTy(DL);
1885 
1886  // MVE has a predicate register.
1887  if ((Subtarget->hasMVEIntegerOps() &&
1888  (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1889  VT == MVT::v16i8)) ||
1890  (Subtarget->hasMVEFloatOps() &&
1891  (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1894 }
1895 
1896 /// getRegClassFor - Return the register class that should be used for the
1897 /// specified value type.
1898 const TargetRegisterClass *
1899 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1900  (void)isDivergent;
1901  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1902  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1903  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1904  // MVE Q registers.
1905  if (Subtarget->hasNEON()) {
1906  if (VT == MVT::v4i64)
1907  return &ARM::QQPRRegClass;
1908  if (VT == MVT::v8i64)
1909  return &ARM::QQQQPRRegClass;
1910  }
1911  if (Subtarget->hasMVEIntegerOps()) {
1912  if (VT == MVT::v4i64)
1913  return &ARM::MQQPRRegClass;
1914  if (VT == MVT::v8i64)
1915  return &ARM::MQQQQPRRegClass;
1916  }
1917  return TargetLowering::getRegClassFor(VT);
1918 }
1919 
1920 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1921 // source/dest is aligned and the copy size is large enough. We therefore want
1922 // to align such objects passed to memory intrinsics.
1924  Align &PrefAlign) const {
1925  if (!isa<MemIntrinsic>(CI))
1926  return false;
1927  MinSize = 8;
1928  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1929  // cycle faster than 4-byte aligned LDM.
1930  PrefAlign =
1931  (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1932  return true;
1933 }
1934 
1935 // Create a fast isel object.
1936 FastISel *
1938  const TargetLibraryInfo *libInfo) const {
1939  return ARM::createFastISel(funcInfo, libInfo);
1940 }
1941 
1943  unsigned NumVals = N->getNumValues();
1944  if (!NumVals)
1945  return Sched::RegPressure;
1946 
1947  for (unsigned i = 0; i != NumVals; ++i) {
1948  EVT VT = N->getValueType(i);
1949  if (VT == MVT::Glue || VT == MVT::Other)
1950  continue;
1951  if (VT.isFloatingPoint() || VT.isVector())
1952  return Sched::ILP;
1953  }
1954 
1955  if (!N->isMachineOpcode())
1956  return Sched::RegPressure;
1957 
1958  // Load are scheduled for latency even if there instruction itinerary
1959  // is not available.
1960  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1961  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1962 
1963  if (MCID.getNumDefs() == 0)
1964  return Sched::RegPressure;
1965  if (!Itins->isEmpty() &&
1966  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1967  return Sched::ILP;
1968 
1969  return Sched::RegPressure;
1970 }
1971 
1972 //===----------------------------------------------------------------------===//
1973 // Lowering Code
1974 //===----------------------------------------------------------------------===//
1975 
1976 static bool isSRL16(const SDValue &Op) {
1977  if (Op.getOpcode() != ISD::SRL)
1978  return false;
1979  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1980  return Const->getZExtValue() == 16;
1981  return false;
1982 }
1983 
1984 static bool isSRA16(const SDValue &Op) {
1985  if (Op.getOpcode() != ISD::SRA)
1986  return false;
1987  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1988  return Const->getZExtValue() == 16;
1989  return false;
1990 }
1991 
1992 static bool isSHL16(const SDValue &Op) {
1993  if (Op.getOpcode() != ISD::SHL)
1994  return false;
1995  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1996  return Const->getZExtValue() == 16;
1997  return false;
1998 }
1999 
2000 // Check for a signed 16-bit value. We special case SRA because it makes it
2001 // more simple when also looking for SRAs that aren't sign extending a
2002 // smaller value. Without the check, we'd need to take extra care with
2003 // checking order for some operations.
2004 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2005  if (isSRA16(Op))
2006  return isSHL16(Op.getOperand(0));
2007  return DAG.ComputeNumSignBits(Op) == 17;
2008 }
2009 
2010 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2012  switch (CC) {
2013  default: llvm_unreachable("Unknown condition code!");
2014  case ISD::SETNE: return ARMCC::NE;
2015  case ISD::SETEQ: return ARMCC::EQ;
2016  case ISD::SETGT: return ARMCC::GT;
2017  case ISD::SETGE: return ARMCC::GE;
2018  case ISD::SETLT: return ARMCC::LT;
2019  case ISD::SETLE: return ARMCC::LE;
2020  case ISD::SETUGT: return ARMCC::HI;
2021  case ISD::SETUGE: return ARMCC::HS;
2022  case ISD::SETULT: return ARMCC::LO;
2023  case ISD::SETULE: return ARMCC::LS;
2024  }
2025 }
2026 
2027 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2029  ARMCC::CondCodes &CondCode2) {
2030  CondCode2 = ARMCC::AL;
2031  switch (CC) {
2032  default: llvm_unreachable("Unknown FP condition!");
2033  case ISD::SETEQ:
2034  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2035  case ISD::SETGT:
2036  case ISD::SETOGT: CondCode = ARMCC::GT; break;
2037  case ISD::SETGE:
2038  case ISD::SETOGE: CondCode = ARMCC::GE; break;
2039  case ISD::SETOLT: CondCode = ARMCC::MI; break;
2040  case ISD::SETOLE: CondCode = ARMCC::LS; break;
2041  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2042  case ISD::SETO: CondCode = ARMCC::VC; break;
2043  case ISD::SETUO: CondCode = ARMCC::VS; break;
2044  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2045  case ISD::SETUGT: CondCode = ARMCC::HI; break;
2046  case ISD::SETUGE: CondCode = ARMCC::PL; break;
2047  case ISD::SETLT:
2048  case ISD::SETULT: CondCode = ARMCC::LT; break;
2049  case ISD::SETLE:
2050  case ISD::SETULE: CondCode = ARMCC::LE; break;
2051  case ISD::SETNE:
2052  case ISD::SETUNE: CondCode = ARMCC::NE; break;
2053  }
2054 }
2055 
2056 //===----------------------------------------------------------------------===//
2057 // Calling Convention Implementation
2058 //===----------------------------------------------------------------------===//
2059 
2060 /// getEffectiveCallingConv - Get the effective calling convention, taking into
2061 /// account presence of floating point hardware and calling convention
2062 /// limitations, such as support for variadic functions.
2064 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2065  bool isVarArg) const {
2066  switch (CC) {
2067  default:
2068  report_fatal_error("Unsupported calling convention");
2070  case CallingConv::ARM_APCS:
2071  case CallingConv::GHC:
2073  return CC;
2077  case CallingConv::Swift:
2080  case CallingConv::C:
2081  case CallingConv::Tail:
2082  if (!Subtarget->isAAPCS_ABI())
2083  return CallingConv::ARM_APCS;
2084  else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2085  getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2086  !isVarArg)
2088  else
2089  return CallingConv::ARM_AAPCS;
2090  case CallingConv::Fast:
2092  if (!Subtarget->isAAPCS_ABI()) {
2093  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2094  return CallingConv::Fast;
2095  return CallingConv::ARM_APCS;
2096  } else if (Subtarget->hasVFP2Base() &&
2097  !Subtarget->isThumb1Only() && !isVarArg)
2099  else
2100  return CallingConv::ARM_AAPCS;
2101  }
2102 }
2103 
2105  bool isVarArg) const {
2106  return CCAssignFnForNode(CC, false, isVarArg);
2107 }
2108 
2110  bool isVarArg) const {
2111  return CCAssignFnForNode(CC, true, isVarArg);
2112 }
2113 
2114 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2115 /// CallingConvention.
2116 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2117  bool Return,
2118  bool isVarArg) const {
2119  switch (getEffectiveCallingConv(CC, isVarArg)) {
2120  default:
2121  report_fatal_error("Unsupported calling convention");
2122  case CallingConv::ARM_APCS:
2123  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2125  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2127  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2128  case CallingConv::Fast:
2129  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2130  case CallingConv::GHC:
2131  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2133  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2135  return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2136  }
2137 }
2138 
2139 SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2140  MVT LocVT, MVT ValVT, SDValue Val) const {
2141  Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2142  Val);
2143  if (Subtarget->hasFullFP16()) {
2144  Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2145  } else {
2146  Val = DAG.getNode(ISD::TRUNCATE, dl,
2147  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2148  Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2149  }
2150  return Val;
2151 }
2152 
2153 SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2154  MVT LocVT, MVT ValVT,
2155  SDValue Val) const {
2156  if (Subtarget->hasFullFP16()) {
2157  Val = DAG.getNode(ARMISD::VMOVrh, dl,
2158  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2159  } else {
2160  Val = DAG.getNode(ISD::BITCAST, dl,
2161  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2162  Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2163  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2164  }
2165  return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2166 }
2167 
2168 /// LowerCallResult - Lower the result values of a call into the
2169 /// appropriate copies out of appropriate physical registers.
2170 SDValue ARMTargetLowering::LowerCallResult(
2171  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2172  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2173  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2174  SDValue ThisVal) const {
2175  // Assign locations to each value returned by this call.
2177  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2178  *DAG.getContext());
2179  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2180 
2181  // Copy all of the result registers out of their specified physreg.
2182  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2183  CCValAssign VA = RVLocs[i];
2184 
2185  // Pass 'this' value directly from the argument to return value, to avoid
2186  // reg unit interference
2187  if (i == 0 && isThisReturn) {
2188  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2189  "unexpected return calling convention register assignment");
2190  InVals.push_back(ThisVal);
2191  continue;
2192  }
2193 
2194  SDValue Val;
2195  if (VA.needsCustom() &&
2196  (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2197  // Handle f64 or half of a v2f64.
2198  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2199  InFlag);
2200  Chain = Lo.getValue(1);
2201  InFlag = Lo.getValue(2);
2202  VA = RVLocs[++i]; // skip ahead to next loc
2203  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2204  InFlag);
2205  Chain = Hi.getValue(1);
2206  InFlag = Hi.getValue(2);
2207  if (!Subtarget->isLittle())
2208  std::swap (Lo, Hi);
2209  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2210 
2211  if (VA.getLocVT() == MVT::v2f64) {
2212  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2213  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2214  DAG.getConstant(0, dl, MVT::i32));
2215 
2216  VA = RVLocs[++i]; // skip ahead to next loc
2217  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2218  Chain = Lo.getValue(1);
2219  InFlag = Lo.getValue(2);
2220  VA = RVLocs[++i]; // skip ahead to next loc
2221  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2222  Chain = Hi.getValue(1);
2223  InFlag = Hi.getValue(2);
2224  if (!Subtarget->isLittle())
2225  std::swap (Lo, Hi);
2226  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2227  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2228  DAG.getConstant(1, dl, MVT::i32));
2229  }
2230  } else {
2231  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2232  InFlag);
2233  Chain = Val.getValue(1);
2234  InFlag = Val.getValue(2);
2235  }
2236 
2237  switch (VA.getLocInfo()) {
2238  default: llvm_unreachable("Unknown loc info!");
2239  case CCValAssign::Full: break;
2240  case CCValAssign::BCvt:
2241  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2242  break;
2243  }
2244 
2245  // f16 arguments have their size extended to 4 bytes and passed as if they
2246  // had been copied to the LSBs of a 32-bit register.
2247  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2248  if (VA.needsCustom() &&
2249  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2250  Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2251 
2252  InVals.push_back(Val);
2253  }
2254 
2255  return Chain;
2256 }
2257 
2258 std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2259  const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2260  bool IsTailCall, int SPDiff) const {
2261  SDValue DstAddr;
2262  MachinePointerInfo DstInfo;
2263  int32_t Offset = VA.getLocMemOffset();
2264  MachineFunction &MF = DAG.getMachineFunction();
2265 
2266  if (IsTailCall) {
2267  Offset += SPDiff;
2268  auto PtrVT = getPointerTy(DAG.getDataLayout());
2269  int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2270  int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2271  DstAddr = DAG.getFrameIndex(FI, PtrVT);
2272  DstInfo =
2274  } else {
2275  SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2276  DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2277  StackPtr, PtrOff);
2278  DstInfo =
2280  }
2281 
2282  return std::make_pair(DstAddr, DstInfo);
2283 }
2284 
2285 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2286  SDValue Chain, SDValue &Arg,
2287  RegsToPassVector &RegsToPass,
2288  CCValAssign &VA, CCValAssign &NextVA,
2289  SDValue &StackPtr,
2290  SmallVectorImpl<SDValue> &MemOpChains,
2291  bool IsTailCall,
2292  int SPDiff) const {
2293  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2294  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2295  unsigned id = Subtarget->isLittle() ? 0 : 1;
2296  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2297 
2298  if (NextVA.isRegLoc())
2299  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2300  else {
2301  assert(NextVA.isMemLoc());
2302  if (!StackPtr.getNode())
2303  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2304  getPointerTy(DAG.getDataLayout()));
2305 
2306  SDValue DstAddr;
2307  MachinePointerInfo DstInfo;
2308  std::tie(DstAddr, DstInfo) =
2309  computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2310  MemOpChains.push_back(
2311  DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2312  }
2313 }
2314 
2315 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2316  return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2318 }
2319 
2320 /// LowerCall - Lowering a call into a callseq_start <-
2321 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2322 /// nodes.
2323 SDValue
2324 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2325  SmallVectorImpl<SDValue> &InVals) const {
2326  SelectionDAG &DAG = CLI.DAG;
2327  SDLoc &dl = CLI.DL;
2329  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2331  SDValue Chain = CLI.Chain;
2332  SDValue Callee = CLI.Callee;
2333  bool &isTailCall = CLI.IsTailCall;
2334  CallingConv::ID CallConv = CLI.CallConv;
2335  bool doesNotRet = CLI.DoesNotReturn;
2336  bool isVarArg = CLI.IsVarArg;
2337 
2338  MachineFunction &MF = DAG.getMachineFunction();
2341  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2342  bool isThisReturn = false;
2343  bool isCmseNSCall = false;
2344  bool isSibCall = false;
2345  bool PreferIndirect = false;
2346  bool GuardWithBTI = false;
2347 
2348  // Lower 'returns_twice' calls to a pseudo-instruction.
2349  if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2350  !Subtarget->noBTIAtReturnTwice())
2351  GuardWithBTI = AFI->branchTargetEnforcement();
2352 
2353  // Determine whether this is a non-secure function call.
2354  if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2355  isCmseNSCall = true;
2356 
2357  // Disable tail calls if they're not supported.
2358  if (!Subtarget->supportsTailCall())
2359  isTailCall = false;
2360 
2361  // For both the non-secure calls and the returns from a CMSE entry function,
2362  // the function needs to do some extra work afte r the call, or before the
2363  // return, respectively, thus it cannot end with atail call
2364  if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2365  isTailCall = false;
2366 
2367  if (isa<GlobalAddressSDNode>(Callee)) {
2368  // If we're optimizing for minimum size and the function is called three or
2369  // more times in this block, we can improve codesize by calling indirectly
2370  // as BLXr has a 16-bit encoding.
2371  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2372  if (CLI.CB) {
2373  auto *BB = CLI.CB->getParent();
2374  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2375  count_if(GV->users(), [&BB](const User *U) {
2376  return isa<Instruction>(U) &&
2377  cast<Instruction>(U)->getParent() == BB;
2378  }) > 2;
2379  }
2380  }
2381  if (isTailCall) {
2382  // Check if it's really possible to do a tail call.
2383  isTailCall = IsEligibleForTailCallOptimization(
2384  Callee, CallConv, isVarArg, isStructRet,
2385  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2386  PreferIndirect);
2387 
2388  if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2389  CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2390  isSibCall = true;
2391 
2392  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2393  // detected sibcalls.
2394  if (isTailCall)
2395  ++NumTailCalls;
2396  }
2397 
2398  if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2399  report_fatal_error("failed to perform tail call elimination on a call "
2400  "site marked musttail");
2401  // Analyze operands of the call, assigning locations to each operand.
2403  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2404  *DAG.getContext());
2405  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2406 
2407  // Get a count of how many bytes are to be pushed on the stack.
2408  unsigned NumBytes = CCInfo.getNextStackOffset();
2409 
2410  // SPDiff is the byte offset of the call's argument area from the callee's.
2411  // Stores to callee stack arguments will be placed in FixedStackSlots offset
2412  // by this amount for a tail call. In a sibling call it must be 0 because the
2413  // caller will deallocate the entire stack and the callee still expects its
2414  // arguments to begin at SP+0. Completely unused for non-tail calls.
2415  int SPDiff = 0;
2416 
2417  if (isTailCall && !isSibCall) {
2418  auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2419  unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2420 
2421  // Since callee will pop argument stack as a tail call, we must keep the
2422  // popped size 16-byte aligned.
2424  NumBytes = alignTo(NumBytes, StackAlign);
2425 
2426  // SPDiff will be negative if this tail call requires more space than we
2427  // would automatically have in our incoming argument space. Positive if we
2428  // can actually shrink the stack.
2429  SPDiff = NumReusableBytes - NumBytes;
2430 
2431  // If this call requires more stack than we have available from
2432  // LowerFormalArguments, tell FrameLowering to reserve space for it.
2433  if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2434  AFI->setArgRegsSaveSize(-SPDiff);
2435  }
2436 
2437  if (isSibCall) {
2438  // For sibling tail calls, memory operands are available in our caller's stack.
2439  NumBytes = 0;
2440  } else {
2441  // Adjust the stack pointer for the new arguments...
2442  // These operations are automatically eliminated by the prolog/epilog pass
2443  Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2444  }
2445 
2446  SDValue StackPtr =
2447  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2448 
2449  RegsToPassVector RegsToPass;
2450  SmallVector<SDValue, 8> MemOpChains;
2451 
2452  // During a tail call, stores to the argument area must happen after all of
2453  // the function's incoming arguments have been loaded because they may alias.
2454  // This is done by folding in a TokenFactor from LowerFormalArguments, but
2455  // there's no point in doing so repeatedly so this tracks whether that's
2456  // happened yet.
2457  bool AfterFormalArgLoads = false;
2458 
2459  // Walk the register/memloc assignments, inserting copies/loads. In the case
2460  // of tail call optimization, arguments are handled later.
2461  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2462  i != e;
2463  ++i, ++realArgIdx) {
2464  CCValAssign &VA = ArgLocs[i];
2465  SDValue Arg = OutVals[realArgIdx];
2466  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2467  bool isByVal = Flags.isByVal();
2468 
2469  // Promote the value if needed.
2470  switch (VA.getLocInfo()) {
2471  default: llvm_unreachable("Unknown loc info!");
2472  case CCValAssign::Full: break;
2473  case CCValAssign::SExt:
2474  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2475  break;
2476  case CCValAssign::ZExt:
2477  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2478  break;
2479  case CCValAssign::AExt:
2480  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2481  break;
2482  case CCValAssign::BCvt:
2483  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2484  break;
2485  }
2486 
2487  if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2488  Chain = DAG.getStackArgumentTokenFactor(Chain);
2489  AfterFormalArgLoads = true;
2490  }
2491 
2492  // f16 arguments have their size extended to 4 bytes and passed as if they
2493  // had been copied to the LSBs of a 32-bit register.
2494  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2495  if (VA.needsCustom() &&
2496  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2497  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2498  } else {
2499  // f16 arguments could have been extended prior to argument lowering.
2500  // Mask them arguments if this is a CMSE nonsecure call.
2501  auto ArgVT = Outs[realArgIdx].ArgVT;
2502  if (isCmseNSCall && (ArgVT == MVT::f16)) {
2503  auto LocBits = VA.getLocVT().getSizeInBits();
2504  auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2505  SDValue Mask =
2506  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2507  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2508  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2509  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2510  }
2511  }
2512 
2513  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2514  if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2516  DAG.getConstant(0, dl, MVT::i32));
2518  DAG.getConstant(1, dl, MVT::i32));
2519 
2520  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2521  StackPtr, MemOpChains, isTailCall, SPDiff);
2522 
2523  VA = ArgLocs[++i]; // skip ahead to next loc
2524  if (VA.isRegLoc()) {
2525  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2526  StackPtr, MemOpChains, isTailCall, SPDiff);
2527  } else {
2528  assert(VA.isMemLoc());
2529  SDValue DstAddr;
2530  MachinePointerInfo DstInfo;
2531  std::tie(DstAddr, DstInfo) =
2532  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2533  MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2534  }
2535  } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2536  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2537  StackPtr, MemOpChains, isTailCall, SPDiff);
2538  } else if (VA.isRegLoc()) {
2539  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2540  Outs[0].VT == MVT::i32) {
2541  assert(VA.getLocVT() == MVT::i32 &&
2542  "unexpected calling convention register assignment");
2543  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2544  "unexpected use of 'returned'");
2545  isThisReturn = true;
2546  }
2547  const TargetOptions &Options = DAG.getTarget().Options;
2548  if (Options.EmitCallSiteInfo)
2549  CSInfo.emplace_back(VA.getLocReg(), i);
2550  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2551  } else if (isByVal) {
2552  assert(VA.isMemLoc());
2553  unsigned offset = 0;
2554 
2555  // True if this byval aggregate will be split between registers
2556  // and memory.
2557  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2558  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2559 
2560  if (CurByValIdx < ByValArgsCount) {
2561 
2562  unsigned RegBegin, RegEnd;
2563  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2564 
2565  EVT PtrVT =
2567  unsigned int i, j;
2568  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2569  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2570  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2571  SDValue Load =
2572  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2573  DAG.InferPtrAlign(AddArg));
2574  MemOpChains.push_back(Load.getValue(1));
2575  RegsToPass.push_back(std::make_pair(j, Load));
2576  }
2577 
2578  // If parameter size outsides register area, "offset" value
2579  // helps us to calculate stack slot for remained part properly.
2580  offset = RegEnd - RegBegin;
2581 
2582  CCInfo.nextInRegsParam();
2583  }
2584 
2585  if (Flags.getByValSize() > 4*offset) {
2586  auto PtrVT = getPointerTy(DAG.getDataLayout());
2587  SDValue Dst;
2588  MachinePointerInfo DstInfo;
2589  std::tie(Dst, DstInfo) =
2590  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2591  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2592  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2593  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2594  MVT::i32);
2595  SDValue AlignNode =
2596  DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2597 
2598  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2599  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2600  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2601  Ops));
2602  }
2603  } else {
2604  assert(VA.isMemLoc());
2605  SDValue DstAddr;
2606  MachinePointerInfo DstInfo;
2607  std::tie(DstAddr, DstInfo) =
2608  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2609 
2610  SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2611  MemOpChains.push_back(Store);
2612  }
2613  }
2614 
2615  if (!MemOpChains.empty())
2616  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2617 
2618  // Build a sequence of copy-to-reg nodes chained together with token chain
2619  // and flag operands which copy the outgoing args into the appropriate regs.
2620  SDValue InFlag;
2621  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2622  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2623  RegsToPass[i].second, InFlag);
2624  InFlag = Chain.getValue(1);
2625  }
2626 
2627  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2628  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2629  // node so that legalize doesn't hack it.
2630  bool isDirect = false;
2631 
2632  const TargetMachine &TM = getTargetMachine();
2633  const Module *Mod = MF.getFunction().getParent();
2634  const GlobalValue *GVal = nullptr;
2635  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2636  GVal = G->getGlobal();
2637  bool isStub =
2638  !TM.shouldAssumeDSOLocal(*Mod, GVal) && Subtarget->isTargetMachO();
2639 
2640  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2641  bool isLocalARMFunc = false;
2642  auto PtrVt = getPointerTy(DAG.getDataLayout());
2643 
2644  if (Subtarget->genLongCalls()) {
2645  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2646  "long-calls codegen is not position independent!");
2647  // Handle a global address or an external symbol. If it's not one of
2648  // those, the target's already in a register, so we don't need to do
2649  // anything extra.
2650  if (isa<GlobalAddressSDNode>(Callee)) {
2651  // When generating execute-only code we use movw movt pair.
2652  // Currently execute-only is only available for architectures that
2653  // support movw movt, so we are safe to assume that.
2654  if (Subtarget->genExecuteOnly()) {
2655  assert(Subtarget->useMovt() &&
2656  "long-calls with execute-only requires movt and movw!");
2657  ++NumMovwMovt;
2658  Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2659  DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2660  } else {
2661  // Create a constant pool entry for the callee address
2662  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2664  GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2665 
2666  // Get the address of the callee into a register
2667  SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2668  Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2669  Callee = DAG.getLoad(
2670  PtrVt, dl, DAG.getEntryNode(), Addr,
2672  }
2673  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2674  const char *Sym = S->getSymbol();
2675 
2676  // When generating execute-only code we use movw movt pair.
2677  // Currently execute-only is only available for architectures that
2678  // support movw movt, so we are safe to assume that.
2679  if (Subtarget->genExecuteOnly()) {
2680  assert(Subtarget->useMovt() &&
2681  "long-calls with execute-only requires movt and movw!");
2682  ++NumMovwMovt;
2683  Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2684  DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2685  } else {
2686  // Create a constant pool entry for the callee address
2687  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2689  *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2690 
2691  // Get the address of the callee into a register
2692  SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2693  Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2694  Callee = DAG.getLoad(
2695  PtrVt, dl, DAG.getEntryNode(), Addr,
2697  }
2698  }
2699  } else if (isa<GlobalAddressSDNode>(Callee)) {
2700  if (!PreferIndirect) {
2701  isDirect = true;
2702  bool isDef = GVal->isStrongDefinitionForLinker();
2703 
2704  // ARM call to a local ARM function is predicable.
2705  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2706  // tBX takes a register source operand.
2707  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2708  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2709  Callee = DAG.getNode(
2710  ARMISD::WrapperPIC, dl, PtrVt,
2711  DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2712  Callee = DAG.getLoad(
2713  PtrVt, dl, DAG.getEntryNode(), Callee,
2717  } else if (Subtarget->isTargetCOFF()) {
2718  assert(Subtarget->isTargetWindows() &&
2719  "Windows is the only supported COFF target");
2720  unsigned TargetFlags = ARMII::MO_NO_FLAG;
2721  if (GVal->hasDLLImportStorageClass())
2722  TargetFlags = ARMII::MO_DLLIMPORT;
2723  else if (!TM.shouldAssumeDSOLocal(*GVal->getParent(), GVal))
2724  TargetFlags = ARMII::MO_COFFSTUB;
2725  Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2726  TargetFlags);
2727  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2728  Callee =
2729  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2730  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2732  } else {
2733  Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2734  }
2735  }
2736  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2737  isDirect = true;
2738  // tBX takes a register source operand.
2739  const char *Sym = S->getSymbol();
2740  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2741  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2742  ARMConstantPoolValue *CPV =
2744  ARMPCLabelIndex, 4);
2745  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2746  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2747  Callee = DAG.getLoad(
2748  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2750  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2751  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2752  } else {
2753  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2754  }
2755  }
2756 
2757  if (isCmseNSCall) {
2758  assert(!isARMFunc && !isDirect &&
2759  "Cannot handle call to ARM function or direct call");
2760  if (NumBytes > 0) {
2762  "call to non-secure function would "
2763  "require passing arguments on stack",
2764  dl.getDebugLoc());
2765  DAG.getContext()->diagnose(Diag);
2766  }
2767  if (isStructRet) {
2770  "call to non-secure function would return value through pointer",
2771  dl.getDebugLoc());
2772  DAG.getContext()->diagnose(Diag);
2773  }
2774  }
2775 
2776  // FIXME: handle tail calls differently.
2777  unsigned CallOpc;
2778  if (Subtarget->isThumb()) {
2779  if (GuardWithBTI)
2780  CallOpc = ARMISD::t2CALL_BTI;
2781  else if (isCmseNSCall)
2782  CallOpc = ARMISD::tSECALL;
2783  else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2784  CallOpc = ARMISD::CALL_NOLINK;
2785  else
2786  CallOpc = ARMISD::CALL;
2787  } else {
2788  if (!isDirect && !Subtarget->hasV5TOps())
2789  CallOpc = ARMISD::CALL_NOLINK;
2790  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2791  // Emit regular call when code size is the priority
2792  !Subtarget->hasMinSize())
2793  // "mov lr, pc; b _foo" to avoid confusing the RSP
2794  CallOpc = ARMISD::CALL_NOLINK;
2795  else
2796  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2797  }
2798 
2799  // We don't usually want to end the call-sequence here because we would tidy
2800  // the frame up *after* the call, however in the ABI-changing tail-call case
2801  // we've carefully laid out the parameters so that when sp is reset they'll be
2802  // in the correct location.
2803  if (isTailCall && !isSibCall) {
2804  Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, dl);
2805  InFlag = Chain.getValue(1);
2806  }
2807 
2808  std::vector<SDValue> Ops;
2809  Ops.push_back(Chain);
2810  Ops.push_back(Callee);
2811 
2812  if (isTailCall) {
2813  Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2814  }
2815 
2816  // Add argument registers to the end of the list so that they are known live
2817  // into the call.
2818  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2819  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2820  RegsToPass[i].second.getValueType()));
2821 
2822  // Add a register mask operand representing the call-preserved registers.
2823  const uint32_t *Mask;
2824  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2825  if (isThisReturn) {
2826  // For 'this' returns, use the R0-preserving mask if applicable
2827  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2828  if (!Mask) {
2829  // Set isThisReturn to false if the calling convention is not one that
2830  // allows 'returned' to be modeled in this way, so LowerCallResult does
2831  // not try to pass 'this' straight through
2832  isThisReturn = false;
2833  Mask = ARI->getCallPreservedMask(MF, CallConv);
2834  }
2835  } else
2836  Mask = ARI->getCallPreservedMask(MF, CallConv);
2837 
2838  assert(Mask && "Missing call preserved mask for calling convention");
2839  Ops.push_back(DAG.getRegisterMask(Mask));
2840 
2841  if (InFlag.getNode())
2842  Ops.push_back(InFlag);
2843 
2844  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2845  if (isTailCall) {
2847  SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2848  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2849  return Ret;
2850  }
2851 
2852  // Returns a chain and a flag for retval copy to use.
2853  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2854  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2855  InFlag = Chain.getValue(1);
2856  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2857 
2858  // If we're guaranteeing tail-calls will be honoured, the callee must
2859  // pop its own argument stack on return. But this call is *not* a tail call so
2860  // we need to undo that after it returns to restore the status-quo.
2861  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2862  uint64_t CalleePopBytes =
2863  canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2864 
2865  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InFlag, dl);
2866  if (!Ins.empty())
2867  InFlag = Chain.getValue(1);
2868 
2869  // Handle result values, copying them out of physregs into vregs that we
2870  // return.
2871  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2872  InVals, isThisReturn,
2873  isThisReturn ? OutVals[0] : SDValue());
2874 }
2875 
2876 /// HandleByVal - Every parameter *after* a byval parameter is passed
2877 /// on the stack. Remember the next parameter register to allocate,
2878 /// and then confiscate the rest of the parameter registers to insure
2879 /// this.
2880 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2881  Align Alignment) const {
2882  // Byval (as with any stack) slots are always at least 4 byte aligned.
2883  Alignment = std::max(Alignment, Align(4));
2884 
2885  unsigned Reg = State->AllocateReg(GPRArgRegs);
2886  if (!Reg)
2887  return;
2888 
2889  unsigned AlignInRegs = Alignment.value() / 4;
2890  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2891  for (unsigned i = 0; i < Waste; ++i)
2892  Reg = State->AllocateReg(GPRArgRegs);
2893 
2894  if (!Reg)
2895  return;
2896 
2897  unsigned Excess = 4 * (ARM::R4 - Reg);
2898 
2899  // Special case when NSAA != SP and parameter size greater than size of
2900  // all remained GPR regs. In that case we can't split parameter, we must
2901  // send it to stack. We also must set NCRN to R4, so waste all
2902  // remained registers.
2903  const unsigned NSAAOffset = State->getNextStackOffset();
2904  if (NSAAOffset != 0 && Size > Excess) {
2905  while (State->AllocateReg(GPRArgRegs))
2906  ;
2907  return;
2908  }
2909 
2910  // First register for byval parameter is the first register that wasn't
2911  // allocated before this method call, so it would be "reg".
2912  // If parameter is small enough to be saved in range [reg, r4), then
2913  // the end (first after last) register would be reg + param-size-in-regs,
2914  // else parameter would be splitted between registers and stack,
2915  // end register would be r4 in this case.
2916  unsigned ByValRegBegin = Reg;
2917  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2918  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2919  // Note, first register is allocated in the beginning of function already,
2920  // allocate remained amount of registers we need.
2921  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2922  State->AllocateReg(GPRArgRegs);
2923  // A byval parameter that is split between registers and memory needs its
2924  // size truncated here.
2925  // In the case where the entire structure fits in registers, we set the
2926  // size in memory to zero.
2927  Size = std::max<int>(Size - Excess, 0);
2928 }
2929 
2930 /// MatchingStackOffset - Return true if the given stack call argument is
2931 /// already available in the same position (relatively) of the caller's
2932 /// incoming argument stack.
2933 static
2936  const TargetInstrInfo *TII) {
2937  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2938  int FI = std::numeric_limits<int>::max();
2939  if (Arg.getOpcode() == ISD::CopyFromReg) {
2940  Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2941  if (!VR.isVirtual())
2942  return false;
2943  MachineInstr *Def = MRI->getVRegDef(VR);
2944  if (!Def)
2945  return false;
2946  if (!Flags.isByVal()) {
2947  if (!TII->isLoadFromStackSlot(*Def, FI))
2948  return false;
2949  } else {
2950  return false;
2951  }
2952  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2953  if (Flags.isByVal())
2954  // ByVal argument is passed in as a pointer but it's now being
2955  // dereferenced. e.g.
2956  // define @foo(%struct.X* %A) {
2957  // tail call @bar(%struct.X* byval %A)
2958  // }
2959  return false;
2960  SDValue Ptr = Ld->getBasePtr();
2961  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2962  if (!FINode)
2963  return false;
2964  FI = FINode->getIndex();
2965  } else
2966  return false;
2967 
2969  if (!MFI.isFixedObjectIndex(FI))
2970  return false;
2971  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2972 }
2973 
2974 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2975 /// for tail call optimization. Targets which want to do tail call
2976 /// optimization should implement this function.
2977 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2978  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2979  bool isCalleeStructRet, bool isCallerStructRet,
2980  const SmallVectorImpl<ISD::OutputArg> &Outs,
2981  const SmallVectorImpl<SDValue> &OutVals,
2983  const bool isIndirect) const {
2984  MachineFunction &MF = DAG.getMachineFunction();
2985  const Function &CallerF = MF.getFunction();
2986  CallingConv::ID CallerCC = CallerF.getCallingConv();
2987 
2988  assert(Subtarget->supportsTailCall());
2989 
2990  // Indirect tail calls cannot be optimized for Thumb1 if the args
2991  // to the call take up r0-r3. The reason is that there are no legal registers
2992  // left to hold the pointer to the function to be called.
2993  // Similarly, if the function uses return address sign and authentication,
2994  // r12 is needed to hold the PAC and is not available to hold the callee
2995  // address.
2996  if (Outs.size() >= 4 &&
2997  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
2998  if (Subtarget->isThumb1Only())
2999  return false;
3000  // Conservatively assume the function spills LR.
3002  return false;
3003  }
3004 
3005  // Look for obvious safe cases to perform tail call optimization that do not
3006  // require ABI changes. This is what gcc calls sibcall.
3007 
3008  // Exception-handling functions need a special set of instructions to indicate
3009  // a return to the hardware. Tail-calling another function would probably
3010  // break this.
3011  if (CallerF.hasFnAttribute("interrupt"))
3012  return false;
3013 
3014  if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3015  return CalleeCC == CallerCC;
3016 
3017  // Also avoid sibcall optimization if either caller or callee uses struct
3018  // return semantics.
3019  if (isCalleeStructRet || isCallerStructRet)
3020  return false;
3021 
3022  // Externally-defined functions with weak linkage should not be
3023  // tail-called on ARM when the OS does not support dynamic
3024  // pre-emption of symbols, as the AAELF spec requires normal calls
3025  // to undefined weak functions to be replaced with a NOP or jump to the
3026  // next instruction. The behaviour of branch instructions in this
3027  // situation (as used for tail calls) is implementation-defined, so we
3028  // cannot rely on the linker replacing the tail call with a return.
3029  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3030  const GlobalValue *GV = G->getGlobal();
3032  if (GV->hasExternalWeakLinkage() &&
3033  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3034  return false;
3035  }
3036 
3037  // Check that the call results are passed in the same way.
3038  LLVMContext &C = *DAG.getContext();
3040  getEffectiveCallingConv(CalleeCC, isVarArg),
3041  getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3042  CCAssignFnForReturn(CalleeCC, isVarArg),
3043  CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3044  return false;
3045  // The callee has to preserve all registers the caller needs to preserve.
3046  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3047  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3048  if (CalleeCC != CallerCC) {
3049  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3050  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3051  return false;
3052  }
3053 
3054  // If Caller's vararg or byval argument has been split between registers and
3055  // stack, do not perform tail call, since part of the argument is in caller's
3056  // local frame.
3057  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3058  if (AFI_Caller->getArgRegsSaveSize())
3059  return false;
3060 
3061  // If the callee takes no arguments then go on to check the results of the
3062  // call.
3063  if (!Outs.empty()) {
3064  // Check if stack adjustment is needed. For now, do not do this if any
3065  // argument is passed on the stack.
3067  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3068  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3069  if (CCInfo.getNextStackOffset()) {
3070  // Check if the arguments are already laid out in the right way as
3071  // the caller's fixed stack objects.
3072  MachineFrameInfo &MFI = MF.getFrameInfo();
3073  const MachineRegisterInfo *MRI = &MF.getRegInfo();
3074  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3075  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3076  i != e;
3077  ++i, ++realArgIdx) {
3078  CCValAssign &VA = ArgLocs[i];
3079  EVT RegVT = VA.getLocVT();
3080  SDValue Arg = OutVals[realArgIdx];
3081  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3082  if (VA.getLocInfo() == CCValAssign::Indirect)
3083  return false;
3084  if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3085  // f64 and vector types are split into multiple registers or
3086  // register/stack-slot combinations. The types will not match
3087  // the registers; give up on memory f64 refs until we figure
3088  // out what to do about this.
3089  if (!VA.isRegLoc())
3090  return false;
3091  if (!ArgLocs[++i].isRegLoc())
3092  return false;
3093  if (RegVT == MVT::v2f64) {
3094  if (!ArgLocs[++i].isRegLoc())
3095  return false;
3096  if (!ArgLocs[++i].isRegLoc())
3097  return false;
3098  }
3099  } else if (!VA.isRegLoc()) {
3100  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3101  MFI, MRI, TII))
3102  return false;
3103  }
3104  }
3105  }
3106 
3107  const MachineRegisterInfo &MRI = MF.getRegInfo();
3108  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3109  return false;
3110  }
3111 
3112  return true;
3113 }
3114 
3115 bool
3116 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3117  MachineFunction &MF, bool isVarArg,
3118  const SmallVectorImpl<ISD::OutputArg> &Outs,
3119  LLVMContext &Context) const {
3121  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3122  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3123 }
3124 
3126  const SDLoc &DL, SelectionDAG &DAG) {
3127  const MachineFunction &MF = DAG.getMachineFunction();
3128  const Function &F = MF.getFunction();
3129 
3130  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3131 
3132  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3133  // version of the "preferred return address". These offsets affect the return
3134  // instruction if this is a return from PL1 without hypervisor extensions.
3135  // IRQ/FIQ: +4 "subs pc, lr, #4"
3136  // SWI: 0 "subs pc, lr, #0"
3137  // ABORT: +4 "subs pc, lr, #4"
3138  // UNDEF: +4/+2 "subs pc, lr, #0"
3139  // UNDEF varies depending on where the exception came from ARM or Thumb
3140  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3141 
3142  int64_t LROffset;
3143  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3144  IntKind == "ABORT")
3145  LROffset = 4;
3146  else if (IntKind == "SWI" || IntKind == "UNDEF")
3147  LROffset = 0;
3148  else
3149  report_fatal_error("Unsupported interrupt attribute. If present, value "
3150  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3151 
3152  RetOps.insert(RetOps.begin() + 1,
3153  DAG.getConstant(LROffset, DL, MVT::i32, false));
3154 
3155  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
3156 }
3157 
3158 SDValue
3159 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3160  bool isVarArg,
3161  const SmallVectorImpl<ISD::OutputArg> &Outs,
3162  const SmallVectorImpl<SDValue> &OutVals,
3163  const SDLoc &dl, SelectionDAG &DAG) const {
3164  // CCValAssign - represent the assignment of the return value to a location.
3166 
3167  // CCState - Info about the registers and stack slots.
3168  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3169  *DAG.getContext());
3170 
3171  // Analyze outgoing return values.
3172  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3173 
3174  SDValue Flag;
3175  SmallVector<SDValue, 4> RetOps;
3176  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3177  bool isLittleEndian = Subtarget->isLittle();
3178 
3179  MachineFunction &MF = DAG.getMachineFunction();
3181  AFI->setReturnRegsCount(RVLocs.size());
3182 
3183  // Report error if cmse entry function returns structure through first ptr arg.
3184  if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3185  // Note: using an empty SDLoc(), as the first line of the function is a
3186  // better place to report than the last line.
3189  "secure entry function would return value through pointer",
3190  SDLoc().getDebugLoc());
3191  DAG.getContext()->diagnose(Diag);
3192  }
3193 
3194  // Copy the result values into the output registers.
3195  for (unsigned i = 0, realRVLocIdx = 0;
3196  i != RVLocs.size();
3197  ++i, ++realRVLocIdx) {
3198  CCValAssign &VA = RVLocs[i];
3199  assert(VA.isRegLoc() && "Can only return in registers!");
3200 
3201  SDValue Arg = OutVals[realRVLocIdx];
3202  bool ReturnF16 = false;
3203 
3204  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3205  // Half-precision return values can be returned like this:
3206  //
3207  // t11 f16 = fadd ...
3208  // t12: i16 = bitcast t11
3209  // t13: i32 = zero_extend t12
3210  // t14: f32 = bitcast t13 <~~~~~~~ Arg
3211  //
3212  // to avoid code generation for bitcasts, we simply set Arg to the node
3213  // that produces the f16 value, t11 in this case.
3214  //
3215  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3216  SDValue ZE = Arg.getOperand(0);
3217  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3218  SDValue BC = ZE.getOperand(0);
3219  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3220  Arg = BC.getOperand(0);
3221  ReturnF16 = true;
3222  }
3223  }
3224  }
3225  }
3226 
3227  switch (VA.getLocInfo()) {
3228  default: llvm_unreachable("Unknown loc info!");
3229  case CCValAssign::Full: break;
3230  case CCValAssign::BCvt:
3231  if (!ReturnF16)
3232  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3233  break;
3234  }
3235 
3236  // Mask f16 arguments if this is a CMSE nonsecure entry.
3237  auto RetVT = Outs[realRVLocIdx].ArgVT;
3238  if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3239  if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3240  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3241  } else {
3242  auto LocBits = VA.getLocVT().getSizeInBits();
3243  auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3244  SDValue Mask =
3245  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3246  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3247  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3248  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3249  }
3250  }
3251 
3252  if (VA.needsCustom() &&
3253  (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3254  if (VA.getLocVT() == MVT::v2f64) {
3255  // Extract the first half and return it in two registers.
3257  DAG.getConstant(0, dl, MVT::i32));
3258  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3259  DAG.getVTList(MVT::i32, MVT::i32), Half);
3260 
3261  Chain =
3262  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3263  HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
3264  Flag = Chain.getValue(1);
3265  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3266  VA = RVLocs[++i]; // skip ahead to next loc
3267  Chain =
3268  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3269  HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
3270  Flag = Chain.getValue(1);
3271  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3272  VA = RVLocs[++i]; // skip ahead to next loc
3273 
3274  // Extract the 2nd half and fall through to handle it as an f64 value.
3276  DAG.getConstant(1, dl, MVT::i32));
3277  }
3278  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3279  // available.
3280  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3281  DAG.getVTList(MVT::i32, MVT::i32), Arg);
3282  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3283  fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
3284  Flag = Chain.getValue(1);
3285  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3286  VA = RVLocs[++i]; // skip ahead to next loc
3287  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3288  fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
3289  } else
3290  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
3291 
3292  // Guarantee that all emitted copies are
3293  // stuck together, avoiding something bad.
3294  Flag = Chain.getValue(1);
3295  RetOps.push_back(DAG.getRegister(
3296  VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3297  }
3298  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3299  const MCPhysReg *I =
3300  TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3301  if (I) {
3302  for (; *I; ++I) {
3303  if (ARM::GPRRegClass.contains(*I))
3304  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3305  else if (ARM::DPRRegClass.contains(*I))
3306  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3307  else
3308  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3309  }
3310  }
3311 
3312  // Update chain and glue.
3313  RetOps[0] = Chain;
3314  if (Flag.getNode())
3315  RetOps.push_back(Flag);
3316 
3317  // CPUs which aren't M-class use a special sequence to return from
3318  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3319  // though we use "subs pc, lr, #N").
3320  //
3321  // M-class CPUs actually use a normal return sequence with a special
3322  // (hardware-provided) value in LR, so the normal code path works.
3323  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3324  !Subtarget->isMClass()) {
3325  if (Subtarget->isThumb1Only())
3326  report_fatal_error("interrupt attribute is not supported in Thumb1");
3327  return LowerInterruptReturn(RetOps, dl, DAG);
3328  }
3329 
3332  return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3333 }
3334 
3335 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3336  if (N->getNumValues() != 1)
3337  return false;
3338  if (!N->hasNUsesOfValue(1, 0))
3339  return false;
3340 
3341  SDValue TCChain = Chain;
3342  SDNode *Copy = *N->use_begin();
3343  if (Copy->getOpcode() == ISD::CopyToReg) {
3344  // If the copy has a glue operand, we conservatively assume it isn't safe to
3345  // perform a tail call.
3346  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3347  return false;
3348  TCChain = Copy->getOperand(0);
3349  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3350  SDNode *VMov = Copy;
3351  // f64 returned in a pair of GPRs.
3353  for (SDNode *U : VMov->uses()) {
3354  if (U->getOpcode() != ISD::CopyToReg)
3355  return false;
3356  Copies.insert(U);
3357  }
3358  if (Copies.size() > 2)
3359  return false;
3360 
3361  for (SDNode *U : VMov->uses()) {
3362  SDValue UseChain = U->getOperand(0);
3363  if (Copies.count(UseChain.getNode()))
3364  // Second CopyToReg
3365  Copy = U;
3366  else {
3367  // We are at the top of this chain.
3368  // If the copy has a glue operand, we conservatively assume it
3369  // isn't safe to perform a tail call.
3370  if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3371  return false;
3372  // First CopyToReg
3373  TCChain = UseChain;
3374  }
3375  }
3376  } else if (Copy->getOpcode() == ISD::BITCAST) {
3377  // f32 returned in a single GPR.
3378  if (!Copy->hasOneUse())
3379  return false;
3380  Copy = *Copy->use_begin();
3381  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3382  return false;
3383  // If the copy has a glue operand, we conservatively assume it isn't safe to
3384  // perform a tail call.
3385  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3386  return false;
3387  TCChain = Copy->getOperand(0);
3388  } else {
3389  return false;
3390  }
3391 
3392  bool HasRet = false;
3393  for (const SDNode *U : Copy->uses()) {
3394  if (U->getOpcode() != ARMISD::RET_FLAG &&
3395  U->getOpcode() != ARMISD::INTRET_FLAG)
3396  return false;
3397  HasRet = true;
3398  }
3399 
3400  if (!HasRet)
3401  return false;
3402 
3403  Chain = TCChain;
3404  return true;
3405 }
3406 
3407 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3408  if (!Subtarget->supportsTailCall())
3409  return false;
3410 
3411  if (!CI->isTailCall())
3412  return false;
3413 
3414  return true;
3415 }
3416 
3417 // Trying to write a 64 bit value so need to split into two 32 bit values first,
3418 // and pass the lower and high parts through.
3420  SDLoc DL(Op);
3421  SDValue WriteValue = Op->getOperand(2);
3422 
3423  // This function is only supposed to be called for i64 type argument.
3424  assert(WriteValue.getValueType() == MVT::i64
3425  && "LowerWRITE_REGISTER called for non-i64 type argument.");
3426 
3427  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3428  DAG.getConstant(0, DL, MVT::i32));
3429  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3430  DAG.getConstant(1, DL, MVT::i32));
3431  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3432  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3433 }
3434 
3435 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3436 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3437 // one of the above mentioned nodes. It has to be wrapped because otherwise
3438 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3439 // be used to form addressing mode. These wrapped nodes will be selected
3440 // into MOVi.
3441 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3442  SelectionDAG &DAG) const {
3443  EVT PtrVT = Op.getValueType();
3444  // FIXME there is no actual debug info here
3445  SDLoc dl(Op);
3446  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3447  SDValue Res;
3448 
3449  // When generating execute-only code Constant Pools must be promoted to the
3450  // global data section. It's a bit ugly that we can't share them across basic
3451  // blocks, but this way we guarantee that execute-only behaves correct with
3452  // position-independent addressing modes.
3453  if (Subtarget->genExecuteOnly()) {
3454  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3455  auto T = const_cast<Type*>(CP->getType());
3456  auto C = const_cast<Constant*>(CP->getConstVal());
3457  auto M = const_cast<Module*>(DAG.getMachineFunction().
3458  getFunction().getParent());
3459  auto GV = new GlobalVariable(
3460  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3461  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3462  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3463  Twine(AFI->createPICLabelUId())
3464  );
3465  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3466  dl, PtrVT);
3467  return LowerGlobalAddress(GA, DAG);
3468  }
3469 
3470  // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3471  // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3472  Align CPAlign = CP->getAlign();
3473  if (Subtarget->isThumb1Only())
3474  CPAlign = std::max(CPAlign, Align(4));
3475  if (CP->isMachineConstantPoolEntry())
3476  Res =
3477  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3478  else
3479  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3480  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3481 }
3482 
3485 }
3486 
3487 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3488  SelectionDAG &DAG) const {
3489  MachineFunction &MF = DAG.getMachineFunction();
3491  unsigned ARMPCLabelIndex = 0;
3492  SDLoc DL(Op);
3493  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3494  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3495  SDValue CPAddr;
3496  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3497  if (!IsPositionIndependent) {
3498  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3499  } else {
3500  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3501  ARMPCLabelIndex = AFI->createPICLabelUId();
3502  ARMConstantPoolValue *CPV =
3503  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3504  ARMCP::CPBlockAddress, PCAdj);
3505  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3506  }
3507  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3508  SDValue Result = DAG.getLoad(
3509  PtrVT, DL, DAG.getEntryNode(), CPAddr,
3511  if (!IsPositionIndependent)
3512  return Result;
3513  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3514  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3515 }
3516 
3517 /// Convert a TLS address reference into the correct sequence of loads
3518 /// and calls to compute the variable's address for Darwin, and return an
3519 /// SDValue containing the final node.
3520 
3521 /// Darwin only has one TLS scheme which must be capable of dealing with the
3522 /// fully general situation, in the worst case. This means:
3523 /// + "extern __thread" declaration.
3524 /// + Defined in a possibly unknown dynamic library.
3525 ///
3526 /// The general system is that each __thread variable has a [3 x i32] descriptor
3527 /// which contains information used by the runtime to calculate the address. The
3528 /// only part of this the compiler needs to know about is the first word, which
3529 /// contains a function pointer that must be called with the address of the
3530 /// entire descriptor in "r0".
3531 ///
3532 /// Since this descriptor may be in a different unit, in general access must
3533 /// proceed along the usual ARM rules. A common sequence to produce is:
3534 ///
3535 /// movw rT1, :lower16:_var$non_lazy_ptr
3536 /// movt rT1, :upper16:_var$non_lazy_ptr
3537 /// ldr r0, [rT1]
3538 /// ldr rT2, [r0]
3539 /// blx rT2
3540 /// [...address now in r0...]
3541 SDValue
3542 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3543  SelectionDAG &DAG) const {
3544  assert(Subtarget->isTargetDarwin() &&
3545  "This function expects a Darwin target");
3546  SDLoc DL(Op);
3547 
3548  // First step is to get the address of the actua global symbol. This is where
3549  // the TLS descriptor lives.
3550  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3551 
3552  // The first entry in the descriptor is a function pointer that we must call
3553  // to obtain the address of the variable.
3554  SDValue Chain = DAG.getEntryNode();
3555  SDValue FuncTLVGet = DAG.getLoad(
3556  MVT::i32, DL, Chain, DescAddr,
3560  Chain = FuncTLVGet.getValue(1);
3561 
3563  MachineFrameInfo &MFI = F.getFrameInfo();
3564  MFI.setAdjustsStack(true);
3565 
3566  // TLS calls preserve all registers except those that absolutely must be
3567  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3568  // silly).
3569  auto TRI =
3570  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3571  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3573 
3574  // Finally, we can make the call. This is just a degenerate version of a
3575  // normal AArch64 call node: r0 takes the address of the descriptor, and
3576  // returns the address of the variable in this thread.
3577  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3578  Chain =
3580  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3581  DAG.getRegisterMask(Mask), Chain.getValue(1));
3582  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3583 }
3584 
3585 SDValue
3586 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3587  SelectionDAG &DAG) const {
3588  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3589 
3590  SDValue Chain = DAG.getEntryNode();
3591  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3592  SDLoc DL(Op);
3593 
3594  // Load the current TEB (thread environment block)
3595  SDValue Ops[] = {Chain,
3596  DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3597  DAG.getTargetConstant(15, DL, MVT::i32),
3598  DAG.getTargetConstant(0, DL, MVT::i32),
3599  DAG.getTargetConstant(13, DL, MVT::i32),
3600  DAG.getTargetConstant(0, DL, MVT::i32),
3601  DAG.getTargetConstant(2, DL, MVT::i32)};
3602  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3603  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3604 
3605  SDValue TEB = CurrentTEB.getValue(0);
3606  Chain = CurrentTEB.getValue(1);
3607 
3608  // Load the ThreadLocalStoragePointer from the TEB
3609  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3610  SDValue TLSArray =
3611  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3612  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3613 
3614  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3615  // offset into the TLSArray.
3616 
3617  // Load the TLS index from the C runtime
3618  SDValue TLSIndex =
3619  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3620  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3621  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3622 
3623  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3624  DAG.getConstant(2, DL, MVT::i32));
3625  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3626  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3627  MachinePointerInfo());
3628 
3629  // Get the offset of the start of the .tls section (section base)
3630  const auto *GA = cast<GlobalAddressSDNode>(Op);
3631  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3632  SDValue Offset = DAG.getLoad(
3633  PtrVT, DL, Chain,
3635  DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3637 
3638  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3639 }
3640 
3641 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3642 SDValue
3643 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3644  SelectionDAG &DAG) const {
3645  SDLoc dl(GA);
3646  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3647  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3648  MachineFunction &MF = DAG.getMachineFunction();
3650  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3651  ARMConstantPoolValue *CPV =
3652  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3653  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3654  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3656  Argument = DAG.getLoad(
3657  PtrVT, dl, DAG.getEntryNode(), Argument,
3659  SDValue Chain = Argument.getValue(1);
3660 
3661  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3662  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3663 
3664  // call __tls_get_addr.
3665  ArgListTy Args;
3666  ArgListEntry Entry;
3667  Entry.Node = Argument;
3668  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3669  Args.push_back(Entry);
3670 
3671  // FIXME: is there useful debug info available here?
3673  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3675  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3676 
3677  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3678  return CallResult.first;
3679 }
3680 
3681 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3682 // "local exec" model.
3683 SDValue
3684 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3685  SelectionDAG &DAG,
3686  TLSModel::Model model) const {
3687  const GlobalValue *GV = GA->getGlobal();
3688  SDLoc dl(GA);
3689  SDValue Offset;
3690  SDValue Chain = DAG.getEntryNode();
3691  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3692  // Get the Thread Pointer
3694 
3695  if (model == TLSModel::InitialExec) {
3696  MachineFunction &MF = DAG.getMachineFunction();
3698  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3699  // Initial exec model.
3700  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3701  ARMConstantPoolValue *CPV =
3702  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3704  true);
3705  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3707  Offset = DAG.getLoad(
3708  PtrVT, dl, Chain, Offset,
3710  Chain = Offset.getValue(1);
3711 
3712  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3713  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3714 
3715  Offset = DAG.getLoad(
3716  PtrVT, dl, Chain, Offset,
3718  } else {
3719  // local exec model
3721  ARMConstantPoolValue *CPV =
3723  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3725  Offset = DAG.getLoad(
3726  PtrVT, dl, Chain, Offset,
3728  }
3729 
3730  // The address of the thread local variable is the add of the thread
3731  // pointer with the offset of the variable.
3732  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3733 }
3734 
3735 SDValue
3736 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3737  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3738  if (DAG.getTarget().useEmulatedTLS())
3739  return LowerToTLSEmulatedModel(GA, DAG);
3740 
3741  if (Subtarget->isTargetDarwin())
3742  return LowerGlobalTLSAddressDarwin(Op, DAG);
3743 
3744  if (Subtarget->isTargetWindows())
3745  return LowerGlobalTLSAddressWindows(Op, DAG);
3746 
3747  // TODO: implement the "local dynamic" model
3748  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3750 
3751  switch (model) {
3754  return LowerToTLSGeneralDynamicModel(GA, DAG);
3755  case TLSModel::InitialExec:
3756  case TLSModel::LocalExec:
3757  return LowerToTLSExecModels(GA, DAG, model);
3758  }
3759  llvm_unreachable("bogus TLS model");
3760 }
3761 
3762 /// Return true if all users of V are within function F, looking through
3763 /// ConstantExprs.
3764 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3765  SmallVector<const User*,4> Worklist(V->users());
3766  while (!Worklist.empty()) {
3767  auto *U = Worklist.pop_back_val();
3768  if (isa<ConstantExpr>(U)) {
3769  append_range(Worklist, U->users());
3770  continue;
3771  }
3772 
3773  auto *I = dyn_cast<Instruction>(U);
3774  if (!I || I->getParent()->getParent() != F)
3775  return false;
3776  }
3777  return true;
3778 }
3779 
3781  const GlobalValue *GV, SelectionDAG &DAG,
3782  EVT PtrVT, const SDLoc &dl) {
3783  // If we're creating a pool entry for a constant global with unnamed address,
3784  // and the global is small enough, we can emit it inline into the constant pool
3785  // to save ourselves an indirection.
3786  //
3787  // This is a win if the constant is only used in one function (so it doesn't
3788  // need to be duplicated) or duplicating the constant wouldn't increase code
3789  // size (implying the constant is no larger than 4 bytes).
3790  const Function &F = DAG.getMachineFunction().getFunction();
3791 
3792  // We rely on this decision to inline being idemopotent and unrelated to the
3793  // use-site. We know that if we inline a variable at one use site, we'll
3794  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3795  // doesn't know about this optimization, so bail out if it's enabled else
3796  // we could decide to inline here (and thus never emit the GV) but require
3797  // the GV from fast-isel generated code.
3798  if (!EnableConstpoolPromotion ||
3800  return SDValue();
3801 
3802  auto *GVar = dyn_cast<GlobalVariable>(GV);
3803  if (!GVar || !GVar->hasInitializer() ||
3804  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3805  !GVar->hasLocalLinkage())
3806  return SDValue();
3807 
3808  // If we inline a value that contains relocations, we move the relocations
3809  // from .data to .text. This is not allowed in position-independent code.
3810  auto *Init = GVar->getInitializer();
3811  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3812  Init->needsDynamicRelocation())
3813  return SDValue();
3814 
3815  // The constant islands pass can only really deal with alignment requests
3816  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3817  // any type wanting greater alignment requirements than 4 bytes. We also
3818  // can only promote constants that are multiples of 4 bytes in size or
3819  // are paddable to a multiple of 4. Currently we only try and pad constants
3820  // that are strings for simplicity.
3821  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3822  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3823  Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3824  unsigned RequiredPadding = 4 - (Size % 4);
3825  bool PaddingPossible =
3826  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3827  if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3828  Size == 0)
3829  return SDValue();
3830 
3831  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3832  MachineFunction &MF = DAG.getMachineFunction();
3834 
3835  // We can't bloat the constant pool too much, else the ConstantIslands pass
3836  // may fail to converge. If we haven't promoted this global yet (it may have
3837  // multiple uses), and promoting it would increase the constant pool size (Sz
3838  // > 4), ensure we have space to do so up to MaxTotal.
3839  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3840  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3842  return SDValue();
3843 
3844  // This is only valid if all users are in a single function; we can't clone
3845  // the constant in general. The LLVM IR unnamed_addr allows merging
3846  // constants, but not cloning them.
3847  //
3848  // We could potentially allow cloning if we could prove all uses of the
3849  // constant in the current function don't care about the address, like
3850  // printf format strings. But that isn't implemented for now.
3851  if (!allUsersAreInFunction(GVar, &F))
3852  return SDValue();
3853 
3854  // We're going to inline this global. Pad it out if needed.
3855  if (RequiredPadding != 4) {
3856  StringRef S = CDAInit->getAsString();
3857 
3858  SmallVector<uint8_t,16> V(S.size());
3859  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3860  while (RequiredPadding--)
3861  V.push_back(0);
3862  Init = ConstantDataArray::get(*DAG.getContext(), V);
3863  }
3864 
3865  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3866  SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3867  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3870  PaddedSize - 4);
3871  }
3872  ++NumConstpoolPromoted;
3873  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3874 }
3875 
3877  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3878  if (!(GV = GA->getAliaseeObject()))
3879  return false;
3880  if (const auto *V = dyn_cast<GlobalVariable>(GV))
3881  return V->isConstant();
3882  return isa<Function>(GV);
3883 }
3884 
3885 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3886  SelectionDAG &DAG) const {
3887  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3888  default: llvm_unreachable("unknown object format");
3889  case Triple::COFF:
3890  return LowerGlobalAddressWindows(Op, DAG);
3891  case Triple::ELF:
3892  return LowerGlobalAddressELF(Op, DAG);
3893  case Triple::MachO:
3894  return LowerGlobalAddressDarwin(Op, DAG);
3895  }
3896 }
3897 
3898 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3899  SelectionDAG &DAG) const {
3900  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3901  SDLoc dl(Op);
3902  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3903  const TargetMachine &TM = getTargetMachine();
3904  bool IsRO = isReadOnly(GV);
3905 
3906  // promoteToConstantPool only if not generating XO text section
3907  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3908  if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3909  return V;
3910 
3911  if (isPositionIndependent()) {
3912  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3913  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3914  UseGOT_PREL ? ARMII::MO_GOT : 0);
3915  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3916  if (UseGOT_PREL)
3917  Result =
3918  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3920  return Result;
3921  } else if (Subtarget->isROPI() && IsRO) {
3922  // PC-relative.
3923  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3924  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3925  return Result;
3926  } else if (Subtarget->isRWPI() && !IsRO) {
3927  // SB-relative.
3928  SDValue RelAddr;
3929  if (Subtarget->useMovt()) {
3930  ++NumMovwMovt;
3931  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3932  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3933  } else { // use literal pool for address constant
3934  ARMConstantPoolValue *CPV =
3936  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3937  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3938  RelAddr = DAG.getLoad(
3939  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3941  }
3942  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3943  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3944  return Result;
3945  }
3946 
3947  // If we have T2 ops, we can materialize the address directly via movt/movw
3948  // pair. This is always cheaper.
3949  if (Subtarget->useMovt()) {
3950  ++NumMovwMovt;
3951  // FIXME: Once remat is capable of dealing with instructions with register
3952  // operands, expand this into two nodes.
3953  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3954  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3955  } else {
3956  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3957  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3958  return DAG.getLoad(
3959  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3961  }
3962 }
3963 
3964 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3965  SelectionDAG &DAG) const {
3966  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3967  "ROPI/RWPI not currently supported for Darwin");
3968  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3969  SDLoc dl(Op);
3970  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3971 
3972  if (Subtarget->useMovt())
3973  ++NumMovwMovt;
3974 
3975  // FIXME: Once remat is capable of dealing with instructions with register
3976  // operands, expand this into multiple nodes
3977  unsigned Wrapper =
3979 
3980  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3981  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3982 
3983  if (Subtarget->isGVIndirectSymbol(GV))
3984  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3986  return Result;
3987 }
3988 
3989 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3990  SelectionDAG &DAG) const {
3991  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3992  assert(Subtarget->useMovt() &&
3993  "Windows on ARM expects to use movw/movt");
3994  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3995  "ROPI/RWPI not currently supported for Windows");
3996 
3997  const TargetMachine &TM = getTargetMachine();
3998  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3999  ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
4000  if (GV->hasDLLImportStorageClass())
4001  TargetFlags = ARMII::MO_DLLIMPORT;
4002  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
4003  TargetFlags = ARMII::MO_COFFSTUB;
4004  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4005  SDValue Result;
4006  SDLoc DL(Op);
4007 
4008  ++NumMovwMovt;
4009 
4010  // FIXME: Once remat is capable of dealing with instructions with register
4011  // operands, expand this into two nodes.
4012  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
4013  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
4014  TargetFlags));
4015  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
4016  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4018  return Result;
4019 }
4020 
4021 SDValue
4022 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
4023  SDLoc dl(Op);
4024  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
4025  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
4026  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4027  Op.getOperand(1), Val);
4028 }
4029 
4030 SDValue
4031 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4032  SDLoc dl(Op);
4033  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4034  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4035 }
4036 
4037 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4038  SelectionDAG &DAG) const {
4039  SDLoc dl(Op);
4041  Op.getOperand(0));
4042 }
4043 
4044 SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4045  SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4046  unsigned IntNo =
4047  cast<ConstantSDNode>(
4048  Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other))
4049  ->getZExtValue();
4050  switch (IntNo) {
4051  default:
4052  return SDValue(); // Don't custom lower most intrinsics.
4053  case Intrinsic::arm_gnu_eabi_mcount: {
4054  MachineFunction &MF = DAG.getMachineFunction();
4055  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4056  SDLoc dl(Op);
4057  SDValue Chain = Op.getOperand(0);
4058  // call "\01__gnu_mcount_nc"
4059  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4060  const uint32_t *Mask =
4062  assert(Mask && "Missing call preserved mask for calling convention");
4063  // Mark LR an implicit live-in.
4064  Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4065  SDValue ReturnAddress =
4066  DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4067  constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4068  SDValue Callee =
4069  DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4071  if (Subtarget->isThumb())
4072  return SDValue(
4073  DAG.getMachineNode(
4074  ARM::tBL_PUSHLR, dl, ResultTys,
4075  {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4076  DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4077  0);
4078  return SDValue(
4079  DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4080  {ReturnAddress, Callee, RegisterMask, Chain}),
4081  0);
4082  }
4083  }
4084 }
4085 
4086 SDValue
4087 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4088  const ARMSubtarget *Subtarget) const {
4089  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4090  SDLoc dl(Op);
4091  switch (IntNo) {
4092  default: return SDValue(); // Don't custom lower most intrinsics.
4093  case Intrinsic::thread_pointer: {
4094  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4095  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4096  }
4097  case Intrinsic::arm_cls: {
4098  const SDValue &Operand = Op.getOperand(1);
4099  const EVT VTy = Op.getValueType();
4100  SDValue SRA =
4101  DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4102  SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4103  SDValue SHL =
4104  DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4105  SDValue OR =
4106  DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4107  SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4108  return Result;
4109  }
4110  case Intrinsic::arm_cls64: {
4111  // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4112  // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4113  const SDValue &Operand = Op.getOperand(1);
4114  const EVT VTy = Op.getValueType();
4115 
4116  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
4117  DAG.getConstant(1, dl, VTy));
4118  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
4119  DAG.getConstant(0, dl, VTy));
4120  SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4121  SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4122  SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4123  SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4124  SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4125  SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4126  SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4127  SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4128  SDValue CheckLo =
4129  DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4130  SDValue HiIsZero =
4131  DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4132  SDValue AdjustedLo =
4133  DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4134  SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4135  SDValue Result =
4136  DAG.getSelect(dl, VTy, CheckLo,
4137  DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4138  return Result;
4139  }
4140  case Intrinsic::eh_sjlj_lsda: {
4141  MachineFunction &MF = DAG.getMachineFunction();
4143  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4144  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4145  SDValue CPAddr;
4146  bool IsPositionIndependent = isPositionIndependent();
4147  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4148  ARMConstantPoolValue *CPV =
4149  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4150  ARMCP::CPLSDA, PCAdj);
4151  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4152  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4153  SDValue Result = DAG.getLoad(
4154  PtrVT, dl, DAG.getEntryNode(), CPAddr,
4156 
4157  if (IsPositionIndependent) {
4158  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4159  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4160  }
4161  return Result;
4162  }
4163  case Intrinsic::arm_neon_vabs:
4164  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4165  Op.getOperand(1));
4166  case Intrinsic::arm_neon_vmulls:
4167  case Intrinsic::arm_neon_vmullu: {
4168  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4170  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4171  Op.getOperand(1), Op.getOperand(2));
4172  }
4173  case Intrinsic::arm_neon_vminnm:
4174  case Intrinsic::arm_neon_vmaxnm: {
4175  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4177  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4178  Op.getOperand(1), Op.getOperand(2));
4179  }
4180  case Intrinsic::arm_neon_vminu:
4181  case Intrinsic::arm_neon_vmaxu: {
4182  if (Op.getValueType().isFloatingPoint())
4183  return SDValue();
4184  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4185  ? ISD::UMIN : ISD::UMAX;
4186  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4187  Op.getOperand(1), Op.getOperand(2));
4188  }
4189  case Intrinsic::arm_neon_vmins:
4190  case Intrinsic::arm_neon_vmaxs: {
4191  // v{min,max}s is overloaded between signed integers and floats.
4192  if (!Op.getValueType().isFloatingPoint()) {
4193  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4194  ? ISD::SMIN : ISD::SMAX;
4195  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4196  Op.getOperand(1), Op.getOperand(2));
4197  }
4198  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4200  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4201  Op.getOperand(1), Op.getOperand(2));
4202  }
4203  case Intrinsic::arm_neon_vtbl1:
4204  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4205  Op.getOperand(1), Op.getOperand(2));
4206  case Intrinsic::arm_neon_vtbl2:
4207  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4208  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4209  case Intrinsic::arm_mve_pred_i2v:
4210  case Intrinsic::arm_mve_pred_v2i:
4211  return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4212  Op.getOperand(1));
4213  case Intrinsic::arm_mve_vreinterpretq:
4214  return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4215  Op.getOperand(1));
4216  case Intrinsic::arm_mve_lsll:
4217  return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4218  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4219  case Intrinsic::arm_mve_asrl:
4220  return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4221  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4222  }
4223 }
4224 
4226  const ARMSubtarget *Subtarget) {
4227  SDLoc dl(Op);
4228  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
4229  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
4230  if (SSID == SyncScope::SingleThread)
4231  return Op;
4232 
4233  if (!Subtarget->hasDataBarrier()) {
4234  // Some ARMv6 cpus can support data barriers with an mcr instruction.
4235  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4236  // here.
4237  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4238  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4239  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4240  DAG.getConstant(0, dl, MVT::i32));
4241  }
4242 
4243  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
4244  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
4246  if (Subtarget->isMClass()) {
4247  // Only a full system barrier exists in the M-class architectures.
4248  Domain = ARM_MB::SY;
4249  } else if (Subtarget->preferISHSTBarriers() &&
4250  Ord == AtomicOrdering::Release) {
4251  // Swift happens to implement ISHST barriers in a way that's compatible with
4252  // Release semantics but weaker than ISH so we'd be fools not to use
4253  // it. Beware: other processors probably don't!
4255  }
4256 
4257  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4258  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4259  DAG.getConstant(Domain, dl, MVT::i32));
4260 }
4261 
4263  const ARMSubtarget *Subtarget) {
4264  // ARM pre v5TE and Thumb1 does not have preload instructions.
4265  if (!(Subtarget->isThumb2() ||
4266  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4267  // Just preserve the chain.
4268  return Op.getOperand(0);
4269 
4270  SDLoc dl(Op);
4271  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
4272  if (!isRead &&
4273  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4274  // ARMv7 with MP extension has PLDW.
4275  return Op.getOperand(0);
4276 
4277  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4278  if (Subtarget->isThumb()) {
4279  // Invert the bits.
4280  isRead = ~isRead & 1;
4281  isData = ~isData & 1;
4282  }
4283 
4284  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4285  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4286  DAG.getConstant(isData, dl, MVT::i32));
4287 }
4288 
4290  MachineFunction &MF = DAG.getMachineFunction();
4291  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4292 
4293  // vastart just stores the address of the VarArgsFrameIndex slot into the
4294  // memory location argument.
4295  SDLoc dl(Op);
4296  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4297  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4298  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4299  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4300  MachinePointerInfo(SV));
4301 }
4302 
4303 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4304  CCValAssign &NextVA,
4305  SDValue &Root,
4306  SelectionDAG &DAG,
4307  const SDLoc &dl) const {
4308  MachineFunction &MF = DAG.getMachineFunction();
4310 
4311  const TargetRegisterClass *RC;
4312  if (AFI->isThumb1OnlyFunction())
4313  RC = &ARM::tGPRRegClass;
4314  else
4315  RC = &ARM::GPRRegClass;
4316 
4317  // Transform the arguments stored in physical registers into virtual ones.
4318  Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4319  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4320 
4321  SDValue ArgValue2;
4322  if (NextVA.isMemLoc()) {
4323  MachineFrameInfo &MFI = MF.getFrameInfo();
4324  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4325 
4326  // Create load node to retrieve arguments from the stack.
4327  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4328  ArgValue2 = DAG.getLoad(
4329  MVT::i32, dl, Root, FIN,
4331  } else {
4332  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4333  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4334  }
4335  if (!Subtarget->isLittle())
4336  std::swap (ArgValue, ArgValue2);
4337  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4338 }
4339 
4340 // The remaining GPRs hold either the beginning of variable-argument
4341 // data, or the beginning of an aggregate passed by value (usually
4342 // byval). Either way, we allocate stack slots adjacent to the data
4343 // provided by our caller, and store the unallocated registers there.
4344 // If this is a variadic function, the va_list pointer will begin with
4345 // these values; otherwise, this reassembles a (byval) structure that
4346 // was split between registers and memory.
4347 // Return: The frame index registers were stored into.
4348 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4349  const SDLoc &dl, SDValue &Chain,
4350  const Value *OrigArg,
4351  unsigned InRegsParamRecordIdx,
4352  int ArgOffset, unsigned ArgSize) const {
4353  // Currently, two use-cases possible:
4354  // Case #1. Non-var-args function, and we meet first byval parameter.
4355  // Setup first unallocated register as first byval register;
4356  // eat all remained registers
4357  // (these two actions are performed by HandleByVal method).
4358  // Then, here, we initialize stack frame with
4359  // "store-reg" instructions.
4360  // Case #2. Var-args function, that doesn't contain byval parameters.
4361  // The same: eat all remained unallocated registers,
4362  // initialize stack frame.
4363 
4364  MachineFunction &MF = DAG.getMachineFunction();
4365  MachineFrameInfo &MFI = MF.getFrameInfo();
4367  unsigned RBegin, REnd;
4368  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4369  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4370  } else {
4371  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4372  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4373  REnd = ARM::R4;
4374  }
4375 
4376  if (REnd != RBegin)
4377  ArgOffset = -4 * (ARM::R4 - RBegin);
4378 
4379  auto PtrVT = getPointerTy(DAG.getDataLayout());
4380  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4381  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4382 
4383  SmallVector<SDValue, 4> MemOps;
4384  const TargetRegisterClass *RC =
4385  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4386 
4387  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4388  Register VReg = MF.addLiveIn(Reg, RC);
4389  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4390  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4391  MachinePointerInfo(OrigArg, 4 * i));
4392  MemOps.push_back(Store);
4393  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4394  }
4395 
4396  if (!MemOps.empty())
4397  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4398  return FrameIndex;
4399 }
4400 
4401 // Setup stack frame, the va_list pointer will start from.
4402 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4403  const SDLoc &dl, SDValue &Chain,
4404  unsigned ArgOffset,
4405  unsigned TotalArgRegsSaveSize,
4406  bool ForceMutable) const {
4407  MachineFunction &MF = DAG.getMachineFunction();
4409 
4410  // Try to store any remaining integer argument regs
4411  // to their spots on the stack so that they may be loaded by dereferencing
4412  // the result of va_next.
4413  // If there is no regs to be stored, just point address after last
4414  // argument passed via stack.
4415  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
4416  CCInfo.getInRegsParamsCount(),
4417  CCInfo.getNextStackOffset(),
4418  std::max(4U, TotalArgRegsSaveSize));
4420 }
4421 
4422 bool ARMTargetLowering::splitValueIntoRegisterParts(
4423  SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4424  unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4425  bool IsABIRegCopy = CC.has_value();
4426  EVT ValueVT = Val.getValueType();
4427  if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
4428  PartVT == MVT::f32) {
4429  unsigned ValueBits = ValueVT.getSizeInBits();
4430  unsigned PartBits = PartVT.getSizeInBits();
4431  Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4432  Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4433  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4434  Parts[0] = Val;
4435  return true;
4436  }
4437  return false;
4438 }
4439 
4440 SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4441  SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4442  MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4443  bool IsABIRegCopy = CC.has_value();
4444  if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
4445  PartVT == MVT::f32) {
4446  unsigned ValueBits = ValueVT.getSizeInBits();
4447  unsigned PartBits = PartVT.getSizeInBits();
4448  SDValue Val = Parts[0];
4449 
4450  Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4451  Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4452  Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4453  return Val;
4454  }
4455  return SDValue();
4456 }
4457 
4458 SDValue ARMTargetLowering::LowerFormalArguments(
4459  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4460  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4461  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4462  MachineFunction &MF = DAG.getMachineFunction();
4463  MachineFrameInfo &MFI = MF.getFrameInfo();
4464 
4466 
4467  // Assign locations to all of the incoming arguments.
4469  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4470  *DAG.getContext());
4471  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4472 
4473  SmallVector<SDValue, 16> ArgValues;
4474  SDValue ArgValue;
4476  unsigned CurArgIdx = 0;
4477 
4478  // Initially ArgRegsSaveSize is zero.
4479  // Then we increase this value each time we meet byval parameter.
4480  // We also increase this value in case of varargs function.
4481  AFI->setArgRegsSaveSize(0);
4482 
4483  // Calculate the amount of stack space that we need to allocate to store
4484  // byval and variadic arguments that are passed in registers.
4485  // We need to know this before we allocate the first byval or variadic
4486  // argument, as they will be allocated a stack slot below the CFA (Canonical
4487  // Frame Address, the stack pointer at entry to the function).
4488  unsigned ArgRegBegin = ARM::R4;
4489  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4490  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4491  break;
4492 
4493  CCValAssign &VA = ArgLocs[i];
4494  unsigned Index = VA.getValNo();
4495  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4496  if (!Flags.isByVal())
4497  continue;
4498 
4499  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4500  unsigned RBegin, REnd;
4501  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4502  ArgRegBegin = std::min(ArgRegBegin, RBegin);
4503 
4504  CCInfo.nextInRegsParam();
4505  }
4506  CCInfo.rewindByValRegsInfo();
4507 
4508  int lastInsIndex = -1;
4509  if (isVarArg && MFI.hasVAStart()) {
4510  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4511  if (RegIdx != std::size(GPRArgRegs))
4512  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4513  }
4514 
4515  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4516  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4517  auto PtrVT = getPointerTy(DAG.getDataLayout());
4518 
4519  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4520  CCValAssign &VA = ArgLocs[i];
4521  if (Ins[VA.getValNo()].isOrigArg()) {
4522  std::advance(CurOrigArg,
4523  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4524  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4525  }
4526  // Arguments stored in registers.
4527  if (VA.isRegLoc()) {
4528  EVT RegVT = VA.getLocVT();
4529 
4530  if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4531  // f64 and vector types are split up into multiple registers or
4532  // combinations of registers and stack slots.
4533  SDValue ArgValue1 =
4534  GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4535  VA = ArgLocs[++i]; // skip ahead to next loc
4536  SDValue ArgValue2;
4537  if (VA.isMemLoc()) {
4538  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4539  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4540  ArgValue2 = DAG.getLoad(
4541  MVT::f64, dl, Chain, FIN,
4543  } else {
4544  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4545  }
4546  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4547  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4548  ArgValue1, DAG.getIntPtrConstant(0, dl));
4549  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4550  ArgValue2, DAG.getIntPtrConstant(1, dl));
4551  } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4552  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4553  } else {
4554  const TargetRegisterClass *RC;
4555 
4556  if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4557  RC = &ARM::HPRRegClass;
4558  else if (RegVT == MVT::f32)
4559  RC = &ARM::SPRRegClass;
4560  else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4561  RegVT == MVT::v4bf16)
4562  RC = &ARM::DPRRegClass;
4563  else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4564  RegVT == MVT::v8bf16)
4565  RC = &ARM::QPRRegClass;
4566  else if (RegVT == MVT::i32)
4567  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4568  : &ARM::GPRRegClass;
4569  else
4570  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4571 
4572  // Transform the arguments in physical registers into virtual ones.
4573  Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4574  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4575 
4576  // If this value is passed in r0 and has the returned attribute (e.g.
4577  // C++ 'structors), record this fact for later use.
4578  if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4579  AFI->setPreservesR0();
4580  }
4581  }
4582 
4583  // If this is an 8 or 16-bit value, it is really passed promoted
4584  // to 32 bits. Insert an assert[sz]ext to capture this, then
4585  // truncate to the right size.
4586  switch (VA.getLocInfo()) {
4587  default: llvm_unreachable("Unknown loc info!");
4588  case CCValAssign::Full: break;
4589  case CCValAssign::BCvt:
4590  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4591  break;
4592  case CCValAssign::SExt:
4593  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4594  DAG.getValueType(VA.getValVT()));
4595  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4596  break;
4597  case CCValAssign::ZExt:
4598  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4599  DAG.getValueType(VA.getValVT()));
4600  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4601  break;
4602  }
4603 
4604  // f16 arguments have their size extended to 4 bytes and passed as if they
4605  // had been copied to the LSBs of a 32-bit register.
4606  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4607  if (VA.needsCustom() &&
4608  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4609  ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4610 
4611  InVals.push_back(ArgValue);
4612  } else { // VA.isRegLoc()
4613  // Only arguments passed on the stack should make it here.
4614  assert(VA.isMemLoc());
4615  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4616 
4617  int index = VA.getValNo();
4618 
4619  // Some Ins[] entries become multiple ArgLoc[] entries.
4620  // Process them only once.
4621  if (index != lastInsIndex)
4622  {
4623  ISD::ArgFlagsTy Flags = Ins[index].Flags;
4624  // FIXME: For now, all byval parameter objects are marked mutable.
4625  // This can be changed with more analysis.
4626  // In case of tail call optimization mark all arguments mutable.
4627  // Since they could be overwritten by lowering of arguments in case of
4628  // a tail call.
4629  if (Flags.isByVal()) {
4630  assert(Ins[index].isOrigArg() &&
4631  "Byval arguments cannot be implicit");
4632  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4633 
4634  int FrameIndex = StoreByValRegs(
4635  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4636  VA.getLocMemOffset(), Flags.getByValSize());
4637  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4638  CCInfo.nextInRegsParam();
4639  } else {
4640  unsigned FIOffset = VA.getLocMemOffset();
4641  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4642  FIOffset, true);
4643 
4644  // Create load nodes to retrieve arguments from the stack.
4645  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4646  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4648  DAG.getMachineFunction(), FI)));
4649  }
4650  lastInsIndex = index;
4651  }
4652  }
4653  }
4654 
4655  // varargs
4656  if (isVarArg && MFI.hasVAStart()) {
4657  VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(),
4658  TotalArgRegsSaveSize);
4659  if (AFI->isCmseNSEntryFunction()) {
4662  "secure entry function must not be variadic", dl.getDebugLoc());
4663  DAG.getContext()->diagnose(Diag);
4664  }
4665  }
4666 
4667  unsigned StackArgSize = CCInfo.getNextStackOffset();
4668  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4669  if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4670  // The only way to guarantee a tail call is if the callee restores its
4671  // argument area, but it must also keep the stack aligned when doing so.
4672  const DataLayout &DL = DAG.getDataLayout();
4673  StackArgSize = alignTo(StackArgSize, DL.getStackAlignment());
4674 
4675  AFI->setArgumentStackToRestore(StackArgSize);
4676  }
4677  AFI->setArgumentStackSize(StackArgSize);
4678 
4679  if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) {
4682  "secure entry function requires arguments on stack", dl.getDebugLoc());
4683  DAG.getContext()->diagnose(Diag);
4684  }
4685 
4686  return Chain;
4687 }
4688 
4689 /// isFloatingPointZero - Return true if this is +0.0.
4691  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4692  return CFP->getValueAPF().isPosZero();
4693  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4694  // Maybe this has already been legalized into the constant pool?
4695  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4696  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4697  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4698  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4699  return CFP->getValueAPF().isPosZero();
4700  }
4701  } else if (Op->getOpcode() == ISD::BITCAST &&
4702  Op->getValueType(0) == MVT::f64) {
4703  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4704  // created by LowerConstantFP().
4705  SDValue BitcastOp = Op->getOperand(0);
4706  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4707  isNullConstant(BitcastOp->getOperand(0)))
4708  return true;
4709  }
4710  return false;
4711 }
4712 
4713 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4714 /// the given operands.
4715 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4716  SDValue &ARMcc, SelectionDAG &DAG,
4717  const SDLoc &dl) const {
4718  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4719  unsigned C = RHSC->getZExtValue();
4720  if (!isLegalICmpImmediate((int32_t)C)) {
4721  // Constant does not fit, try adjusting it by one.
4722  switch (CC) {
4723  default: break;
4724  case ISD::SETLT:
4725  case ISD::SETGE:
4726  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4727  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
4728  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4729  }
4730  break;
4731  case ISD::SETULT:
4732  case ISD::SETUGE:
4733  if (C != 0 && isLegalICmpImmediate(C-1)) {
4735  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4736  }
4737  break;
4738  case ISD::SETLE:
4739  case ISD::SETGT:
4740  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4741  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
4742  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4743  }
4744  break;
4745  case ISD::SETULE:
4746  case ISD::SETUGT:
4747  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4749  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4750  }
4751  break;
4752  }
4753  }
4754  } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4755  (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) {
4756  // In ARM and Thumb-2, the compare instructions can shift their second
4757  // operand.
4759  std::swap(LHS, RHS);
4760  }
4761 
4762  // Thumb1 has very limited immediate modes, so turning an "and" into a
4763  // shift can save multiple instructions.
4764  //
4765  // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4766  // into "((x << n) >> n)". But that isn't necessarily profitable on its
4767  // own. If it's the operand to an unsigned comparison with an immediate,
4768  // we can eliminate one of the shifts: we transform
4769  // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4770  //
4771  // We avoid transforming cases which aren't profitable due to encoding
4772  // details:
4773  //
4774  // 1. C2 fits into the immediate field of a cmp, and the transformed version
4775  // would not; in that case, we're essentially trading one immediate load for
4776  // another.
4777  // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4778  // 3. C2 is zero; we have other code for this special case.
4779  //
4780  // FIXME: Figure out profitability for Thumb2; we usually can't save an
4781  // instruction, since the AND is always one instruction anyway, but we could
4782  // use narrow instructions in some cases.
4783  if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4784  LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4785  LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4786  !isSignedIntSetCC(CC)) {
4787  unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue();
4788  auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4789  uint64_t RHSV = RHSC->getZExtValue();
4790  if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4791  unsigned ShiftBits = llvm::countl_zero(Mask);
4792  if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4793  SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4794  LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4795  RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4796  }
4797  }
4798  }
4799 
4800  // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4801  // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4802  // way a cmp would.
4803  // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4804  // some tweaks to the heuristics for the previous and->shift transform.
4805  // FIXME: Optimize cases where the LHS isn't a shift.
4806  if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4807  isa<ConstantSDNode>(RHS) &&
4808  cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U &&
4809  CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4810  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) {
4811  unsigned ShiftAmt =
4812  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1;
4813  SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4814  DAG.getVTList(MVT::i32, MVT::i32),
4815  LHS.getOperand(0),
4816  DAG.getConstant(ShiftAmt, dl, MVT::i32));
4817  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4818  Shift.getValue(1), SDValue());
4819  ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4820  return Chain.getValue(1);
4821  }
4822 
4824 
4825  // If the RHS is a constant zero then the V (overflow) flag will never be
4826  // set. This can allow us to simplify GE to PL or LT to MI, which can be
4827  // simpler for other passes (like the peephole optimiser) to deal with.
4828  if (isNullConstant(RHS)) {
4829  switch (CondCode) {
4830  default: break;
4831  case ARMCC::GE:
4832  CondCode = ARMCC::PL;
4833  break;
4834  case ARMCC::LT:
4835  CondCode = ARMCC::MI;
4836  break;
4837  }
4838  }
4839 
4840  ARMISD::NodeType CompareType;
4841  switch (CondCode) {
4842  default:
4843  CompareType = ARMISD::CMP;
4844  break;
4845  case ARMCC::EQ:
4846  case ARMCC::NE:
4847  // Uses only Z Flag
4848  CompareType = ARMISD::CMPZ;
4849  break;
4850  }
4851  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4852  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4853 }
4854 
4855 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4856 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4857  SelectionDAG &DAG, const SDLoc &dl,
4858  bool Signaling) const {
4859  assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4860  SDValue Cmp;
4861  if (!isFloatingPointZero(RHS))
4862  Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4863  dl, MVT::Glue, LHS, RHS);
4864  else
4865  Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4866  dl, MVT::Glue, LHS);
4867  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4868 }
4869 
4870 /// duplicateCmp - Glue values can have only one use, so this function
4871 /// duplicates a comparison node.
4872 SDValue
4873 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4874  unsigned Opc = Cmp.getOpcode();
4875  SDLoc DL(Cmp);
4876  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4877  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4878 
4879  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
4880  Cmp = Cmp.getOperand(0);
4881  Opc = Cmp.getOpcode();
4882  if (Opc == ARMISD::CMPFP)
4883  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4884  else {
4885  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
4886  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4887  }
4888  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4889 }
4890 
4891 // This function returns three things: the arithmetic computation itself
4892 // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4893 // comparison and the condition code define the case in which the arithmetic
4894 // computation *does not* overflow.
4895 std::pair<SDValue, SDValue>
4896 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4897  SDValue &ARMcc) const {
4898  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4899 
4900  SDValue Value, OverflowCmp;
4901  SDValue LHS = Op.getOperand(0);
4902  SDValue RHS = Op.getOperand(1);
4903  SDLoc dl(Op);
4904 
4905  // FIXME: We are currently always generating CMPs because we don't support
4906  // generating CMN through the backend. This is not as good as the natural
4907  // CMP case because it causes a register dependency and cannot be folded
4908  // later.
4909 
4910  switch (Op.getOpcode()) {
4911  default:
4912  llvm_unreachable("Unknown overflow instruction!");
4913  case ISD::SADDO:
4914  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4915  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4916  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4917  break;
4918  case ISD::UADDO:
4919  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4920  // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4921  // We do not use it in the USUBO case as Value may not be used.
4922  Value = DAG.getNode(ARMISD::ADDC, dl,
4923  DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4924  .getValue(0);
4925  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4926  break;
4927  case ISD::SSUBO:
4928  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4929  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4930  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4931  break;
4932  case ISD::USUBO:
4933  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4934  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4935  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4936  break;
4937  case ISD::UMULO:
4938  // We generate a UMUL_LOHI and then check if the high word is 0.
4939  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4940  Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4941  DAG.getVTList(Op.getValueType(), Op.getValueType()),
4942  LHS, RHS);
4943  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4944  DAG.getConstant(0, dl, MVT::i32));
4945  Value = Value.getValue(0); // We only want the low 32 bits for the result.
4946  break;
4947  case ISD::SMULO:
4948  // We generate a SMUL_LOHI and then check if all the bits of the high word
4949  // are the same as the sign bit of the low word.
4950  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4951  Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4952  DAG.getVTList(Op.getValueType(), Op.getValueType()),
4953  LHS, RHS);
4954  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4955  DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4956  Value.getValue(0),
4957  DAG.getConstant(31, dl, MVT::i32)));
4958  Value = Value.getValue(0); // We only want the low 32 bits for the result.
4959  break;
4960  } // switch (...)
4961 
4962  return std::make_pair(Value, OverflowCmp);
4963 }
4964 
4965 SDValue
4966 ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4967  // Let legalize expand this if it isn't a legal type yet.
4968  if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4969  return SDValue();
4970 
4971  SDValue Value, OverflowCmp;
4972  SDValue ARMcc;
4973  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4974  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4975  SDLoc dl(Op);
4976  // We use 0 and 1 as false and true values.
4977  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4978  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4979  EVT VT = Op.getValueType();
4980 
4981  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4982  ARMcc, CCR, OverflowCmp);
4983 
4984  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4985  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4986 }
4987 
4989  SelectionDAG &DAG) {
4990  SDLoc DL(BoolCarry);
4991  EVT CarryVT = BoolCarry.getValueType();
4992 
4993  // This converts the boolean value carry into the carry flag by doing
4994  // ARMISD::SUBC Carry, 1
4995  SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4996  DAG.getVTList(CarryVT, MVT::i32),
4997  BoolCarry, DAG.getConstant(1, DL, CarryVT));
4998  return Carry.getValue(1);
4999 }
5000 
5002  SelectionDAG &DAG) {
5003  SDLoc DL(Flags);
5004 
5005  // Now convert the carry flag into a boolean carry. We do this
5006  // using ARMISD:ADDE 0, 0, Carry
5007  return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
5008  DAG.getConstant(0, DL, MVT::i32),
5009  DAG.getConstant(0, DL, MVT::i32), Flags);
5010 }
5011 
5012 SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
5013  SelectionDAG &DAG) const {
5014  // Let legalize expand this if it isn't a legal type yet.
5015  if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5016  return SDValue();
5017 
5018  SDValue LHS = Op.getOperand(0);
5019  SDValue RHS = Op.getOperand(1);
5020  SDLoc dl(Op);
5021 
5022  EVT VT = Op.getValueType();
5023  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5024  SDValue Value;
5025  SDValue Overflow;
5026  switch (Op.getOpcode()) {
5027  default:
5028  llvm_unreachable("Unknown overflow instruction!");
5029  case ISD::UADDO:
5030  Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5031  // Convert the carry flag into a boolean value.
5032  Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5033  break;
5034  case ISD::USUBO: {
5035  Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5036  // Convert the carry flag into a boolean value.
5037  Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5038  // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5039  // value. So compute 1 - C.
5040  Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5041  DAG.getConstant(1, dl, MVT::i32), Overflow);
5042  break;
5043  }
5044  }
5045 
5046  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5047 }
5048 
5050  const ARMSubtarget *Subtarget) {
5051  EVT VT = Op.getValueType();
5052  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
5053  return SDValue();
5054  if (!VT.isSimple())
5055  return SDValue();
5056 
5057  unsigned NewOpcode;
5058  switch (VT.getSimpleVT().SimpleTy) {
5059  default:
5060  return SDValue();
5061  case MVT::i8:
5062  switch (Op->getOpcode()) {
5063  case ISD::UADDSAT:
5064  NewOpcode = ARMISD::UQADD8b;
5065  break;
5066  case ISD::SADDSAT:
5067  NewOpcode = ARMISD::QADD8b;
5068  break;
5069  case ISD::USUBSAT:
5070  NewOpcode = ARMISD::UQSUB8b;
5071  break;
5072  case ISD::SSUBSAT:
5073  NewOpcode = ARMISD::QSUB8b;
5074  break;
5075  }
5076  break;
5077  case MVT::i16:
5078  switch (Op->getOpcode()) {
5079  case ISD::UADDSAT:
5080  NewOpcode = ARMISD::UQADD16b;
5081  break;
5082  case ISD::SADDSAT:
5083  NewOpcode = ARMISD::QADD16b;
5084  break;
5085  case ISD::USUBSAT:
5086  NewOpcode = ARMISD::UQSUB16b;
5087  break;
5088  case ISD::SSUBSAT:
5089  NewOpcode = ARMISD::QSUB16b;
5090  break;
5091  }
5092  break;
5093  }
5094 
5095  SDLoc dl(Op);
5096  SDValue Add =
5097  DAG.getNode(NewOpcode, dl, MVT::i32,
5098  DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5099  DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5100  return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5101 }
5102 
5103 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5104  SDValue Cond = Op.getOperand(0);
5105  SDValue SelectTrue = Op.getOperand(1);
5106  SDValue SelectFalse = Op.getOperand(2);
5107  SDLoc dl(Op);
5108  unsigned Opc = Cond.getOpcode();
5109 
5110  if (Cond.getResNo() == 1 &&
5111  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5112  Opc == ISD::USUBO)) {
5113  if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5114  return SDValue();
5115 
5116  SDValue Value, OverflowCmp;
5117  SDValue ARMcc;
5118  std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5119  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5120  EVT VT = Op.getValueType();
5121 
5122  return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
5123  OverflowCmp, DAG);
5124  }
5125 
5126  // Convert:
5127  //
5128  // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5129  // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5130  //
5131  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5132  const ConstantSDNode *CMOVTrue =
5133  dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5134  const ConstantSDNode *CMOVFalse =
5135  dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5136 
5137  if (CMOVTrue && CMOVFalse) {
5138  unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5139  unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5140 
5141  SDValue True;
5142  SDValue False;
5143  if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5144  True = SelectTrue;
5145  False = SelectFalse;
5146  } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5147  True = SelectFalse;
5148  False = SelectTrue;
5149  }
5150 
5151  if (True.getNode() && False.getNode()) {
5152  EVT VT = Op.getValueType();
5153  SDValue ARMcc = Cond.getOperand(2);
5154  SDValue CCR = Cond.getOperand(3);
5155  SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
5156  assert(True.getValueType() == VT);
5157  return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
5158  }
5159  }
5160  }
5161 
5162  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5163  // undefined bits before doing a full-word comparison with zero.
5164  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5165  DAG.getConstant(1, dl, Cond.getValueType()));
5166 
5167  return DAG.getSelectCC(dl, Cond,
5168  DAG.getConstant(0, dl, Cond.getValueType()),
5169  SelectTrue, SelectFalse, ISD::SETNE);
5170 }
5171 
5173  bool &swpCmpOps, bool &swpVselOps) {
5174  // Start by selecting the GE condition code for opcodes that return true for
5175  // 'equality'
5176  if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5177  CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5178  CondCode = ARMCC::GE;
5179 
5180  // and GT for opcodes that return false for 'equality'.
5181  else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5182  CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5183  CondCode = ARMCC::GT;
5184 
5185  // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5186  // to swap the compare operands.
5187  if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5188  CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5189  swpCmpOps = true;
5190 
5191  // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5192  // If we have an unordered opcode, we need to swap the operands to the VSEL
5193  // instruction (effectively negating the condition).
5194  //
5195  // This also has the effect of swapping which one of 'less' or 'greater'
5196  // returns true, so we also swap the compare operands. It also switches
5197  // whether we return true for 'equality', so we compensate by picking the
5198  // opposite condition code to our original choice.
5199  if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5200  CC == ISD::SETUGT) {
5201  swpCmpOps = !swpCmpOps;
5202  swpVselOps = !swpVselOps;
5204  }
5205 
5206  // 'ordered' is 'anything but unordered', so use the VS condition code and
5207  // swap the VSEL operands.
5208  if (CC == ISD::SETO) {
5209  CondCode = ARMCC::VS;
5210  swpVselOps = true;
5211  }
5212 
5213  // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5214  // code and swap the VSEL operands. Also do this if we don't care about the
5215  // unordered case.
5216  if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5217  CondCode = ARMCC::EQ;
5218  swpVselOps = true;
5219  }
5220 }
5221 
5222 SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5223  SDValue TrueVal, SDValue ARMcc, SDValue CCR,
5224  SDValue Cmp, SelectionDAG &DAG) const {
5225  if (!Subtarget->hasFP64() && VT == MVT::f64) {
5226  FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
5228  TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
5230 
5231  SDValue TrueLow = TrueVal.getValue(0);
5232  SDValue TrueHigh = TrueVal.getValue(1);
5233  SDValue FalseLow = FalseVal.getValue(0);
5234  SDValue FalseHigh = FalseVal.getValue(1);
5235 
5236  SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5237  ARMcc, CCR, Cmp);
5238  SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5239  ARMcc, CCR, duplicateCmp(Cmp, DAG));
5240 
5241  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5242  } else {
5243  return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5244  Cmp);
5245  }
5246 }
5247 
5248 static bool isGTorGE(ISD::CondCode CC) {
5249  return CC == ISD::SETGT || CC == ISD::SETGE;
5250 }
5251 
5252 static bool isLTorLE(ISD::CondCode CC) {
5253  return CC == ISD::SETLT || CC == ISD::SETLE;
5254 }
5255 
5256 // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5257 // All of these conditions (and their <= and >= counterparts) will do:
5258 // x < k ? k : x
5259 // x > k ? x : k
5260 // k < x ? x : k
5261 // k > x ? k : x
5262 static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5263  const SDValue TrueVal, const SDValue FalseVal,
5264  const ISD::CondCode CC, const SDValue K) {
5265  return (isGTorGE(CC) &&
5266  ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5267  (isLTorLE(CC) &&
5268  ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5269 }
5270 
5271 // Check if two chained conditionals could be converted into SSAT or USAT.
5272 //
5273 // SSAT can replace a set of two conditional selectors that bound a number to an
5274 // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5275 //
5276 // x < -k ? -k : (x > k ? k : x)
5277 // x < -k ? -k : (x < k ? x : k)
5278 // x > -k ? (x > k ? k : x) : -k
5279 // x < k ? (x < -k ? -k : x) : k
5280 // etc.
5281 //
5282 // LLVM canonicalizes these to either a min(max()) or a max(min())
5283 // pattern. This function tries to match one of these and will return a SSAT
5284 // node if successful.
5285 //
5286 // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5287 // is a power of 2.
5289  EVT VT = Op.getValueType();
5290  SDValue V1 = Op.getOperand(0);
5291  SDValue K1 = Op.getOperand(1);
5292  SDValue TrueVal1 = Op.getOperand(2);
5293  SDValue FalseVal1 = Op.getOperand(3);
5294  ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5295 
5296  const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5297  if (Op2.getOpcode() != ISD::SELECT_CC)
5298  return SDValue();
5299 
5300  SDValue V2 = Op2.getOperand(0);
5301  SDValue K2 = Op2.getOperand(1);
5302  SDValue TrueVal2 = Op2.getOperand(2);
5303  SDValue FalseVal2 = Op2.getOperand(3);
5304  ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5305 
5306  SDValue V1Tmp = V1;
5307  SDValue V2Tmp = V2;
5308 
5309  // Check that the registers and the constants match a max(min()) or min(max())
5310  // pattern
5311  if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5312  K2 != FalseVal2 ||
5313  !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5314  return SDValue();
5315 
5316  // Check that the constant in the lower-bound check is
5317  // the opposite of the constant in the upper-bound check
5318  // in 1's complement.
5319  if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5320  return SDValue();
5321 
5322  int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5323  int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5324  int64_t PosVal = std::max(Val1, Val2);
5325  int64_t NegVal = std::min(Val1, Val2);
5326 
5327  if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5328  !isPowerOf2_64(PosVal + 1))
5329  return SDValue();
5330 
5331  // Handle the difference between USAT (unsigned) and SSAT (signed)
5332  // saturation
5333  // At this point, PosVal is guaranteed to be positive
5334  uint64_t K = PosVal;
5335  SDLoc dl(Op);
5336  if (Val1 == ~Val2)
5337  return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5338  DAG.getConstant(llvm::countr_one(K), dl, VT));
5339  if (NegVal == 0)
5340  return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5341  DAG.getConstant(llvm::countr_one(K), dl, VT));
5342 
5343  return SDValue();
5344 }
5345 
5346 // Check if a condition of the type x < k ? k : x can be converted into a
5347 // bit operation instead of conditional moves.
5348 // Currently this is allowed given:
5349 // - The conditions and values match up
5350 // - k is 0 or -1 (all ones)
5351 // This function will not check the last condition, thats up to the caller
5352 // It returns true if the transformation can be made, and in such case
5353 // returns x in V, and k in SatK.
5355  SDValue &SatK)
5356 {
5357  SDValue LHS = Op.getOperand(0);
5358  SDValue RHS = Op.getOperand(1);
5359  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5360  SDValue TrueVal = Op.getOperand(2);
5361  SDValue FalseVal = Op.getOperand(3);
5362 
5363  SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5364  ? &RHS
5365  : nullptr;
5366 
5367  // No constant operation in comparison, early out
5368  if (!K)
5369  return false;
5370 
5371  SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5372  V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5373  SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5374 
5375  // If the constant on left and right side, or variable on left and right,
5376  // does not match, early out
5377  if (*K != KTmp || V != VTmp)
5378  return false;
5379 
5380  if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5381  SatK = *K;
5382  return true;
5383  }
5384 
5385  return false;
5386 }
5387 
5388 bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5389  if (VT == MVT::f32)
5390  return !Subtarget->hasVFP2Base();
5391  if (VT == MVT::f64)
5392  return !Subtarget->hasFP64();
5393  if (VT == MVT::f16)
5394  return !Subtarget->hasFullFP16();
5395  return false;
5396 }
5397 
5398 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5399  EVT VT = Op.getValueType();
5400  SDLoc dl(Op);
5401 
5402  // Try to convert two saturating conditional selects into a single SSAT
5403  if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5404  if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5405  return SatValue;
5406 
5407  // Try to convert expressions of the form x < k ? k : x (and similar forms)
5408  // into more efficient bit operations, which is possible when k is 0 or -1
5409  // On ARM and Thumb-2 which have flexible operand 2 this will result in
5410  // single instructions. On Thumb the shift and the bit operation will be two
5411  // instructions.
5412  // Only allow this transformation on full-width (32-bit) operations
5413  SDValue LowerSatConstant;
5414  SDValue SatValue;
5415  if (VT == MVT::i32 &&
5416  isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5417  SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5418  DAG.getConstant(31, dl, VT));
5419  if (isNullConstant(LowerSatConstant)) {
5420  SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5421  DAG.getAllOnesConstant(dl, VT));
5422  return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5423  } else if (isAllOnesConstant(LowerSatConstant))
5424  return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5425  }
5426 
5427  SDValue LHS = Op.getOperand(0);
5428  SDValue RHS = Op.getOperand(1);
5429  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5430  SDValue TrueVal = Op.getOperand(2);
5431  SDValue FalseVal = Op.getOperand(3);
5432  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5433  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5434 
5435  if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5436  LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5437  unsigned TVal = CTVal->getZExtValue();
5438  unsigned FVal = CFVal->getZExtValue();
5439  unsigned Opcode = 0;
5440 
5441  if (TVal == ~FVal) {
5442  Opcode = ARMISD::CSINV;
5443  } else if (TVal == ~FVal + 1) {
5444  Opcode = ARMISD::CSNEG;
5445  } else if (TVal + 1 == FVal) {
5446  Opcode = ARMISD::CSINC;
5447  } else if (TVal == FVal + 1) {
5448  Opcode = ARMISD::CSINC;
5450  std::swap(TVal, FVal);
5451  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5452  }
5453 
5454  if (Opcode) {
5455  // If one of the constants is cheaper than another, materialise the
5456  // cheaper one and let the csel generate the other.
5457  if (Opcode != ARMISD::CSINC &&
5458  HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5460  std::swap(TVal, FVal);
5461  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5462  }
5463 
5464  // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5465  // to get there. CSINC not is invertable like the other two (~(~a) == a,
5466  // -(-a) == a, but (a+1)+1 != a).
5467  if (FVal == 0 && Opcode != ARMISD::CSINC) {
5469  std::swap(TVal, FVal);
5470  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5471  }
5472 
5473  // Drops F's value because we can get it by inverting/negating TVal.
5474  FalseVal = TrueVal;
5475 
5476  SDValue ARMcc;
5477  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5478  EVT VT = TrueVal.getValueType();
5479  return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5480  }
5481  }
5482 
5483  if (isUnsupportedFloatingType(LHS.getValueType())) {
5485  DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5486 
5487  // If softenSetCCOperands only returned one value, we should compare it to
5488  // zero.
5489  if (!RHS.getNode()) {
5490  RHS = DAG.getConstant(0, dl, LHS.getValueType());
5491  CC = ISD::SETNE;
5492  }
5493  }
5494 
5495  if (LHS.getValueType() == MVT::i32) {
5496  // Try to generate VSEL on ARMv8.
5497  // The VSEL instruction can't use all the usual ARM condition
5498  // codes: it only has two bits to select the condition code, so it's
5499  // constrained to use only GE, GT, VS and EQ.
5500  //
5501  // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5502  // swap the operands of the previous compare instruction (effectively
5503  // inverting the compare condition, swapping 'less' and 'greater') and
5504  // sometimes need to swap the operands to the VSEL (which inverts the
5505  // condition in the sense of firing whenever the previous condition didn't)
5506  if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5507  TrueVal.getValueType() == MVT::f32 ||
5508  TrueVal.getValueType() == MVT::f64)) {
5510  if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5511  CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5512  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5514  }
5515  }
5516 
5517  SDValue ARMcc;
5518  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5519  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5520  // Choose GE over PL, which vsel does now support
5521  if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL)
5522  ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5523  return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5524  }
5525 
5526  ARMCC::CondCodes CondCode, CondCode2;
5527  FPCCToARMCC(CC, CondCode, CondCode2);
5528 
5529  // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5530  // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5531  // must use VSEL (limited condition codes), due to not having conditional f16
5532  // moves.
5533  if (Subtarget->hasFPARMv8Base() &&
5534  !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5535  (TrueVal.getValueType() == MVT::f16 ||
5536  TrueVal.getValueType() == MVT::f32 ||
5537  TrueVal.getValueType() == MVT::f64)) {
5538  bool swpCmpOps = false;
5539  bool swpVselOps = false;
5540  checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5541 
5542  if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5543  CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5544  if (swpCmpOps)
5545  std::swap(LHS, RHS);
5546  if (swpVselOps)
5548  }
5549  }
5550 
5551  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5552  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5553  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5554  SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5555  if (CondCode2 != ARMCC::AL) {
5556  SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5557  // FIXME: Needs another CMP because flag can have but one use.
5558  SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5559  Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5560  }
5561  return Result;
5562 }
5563 
5564 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
5565 /// to morph to an integer compare sequence.
5566 static bool canChangeToInt(SDValue Op, bool &SeenZero,
5567  const ARMSubtarget *Subtarget) {
5568  SDNode *N = Op.getNode();
5569  if (!N->hasOneUse())
5570  // Otherwise it requires moving the value from fp to integer registers.
5571  return false;
5572  if (!N->getNumValues())
5573  return false;
5574  EVT VT = Op.getValueType();
5575  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5576  // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5577  // vmrs are very slow, e.g. cortex-a8.
5578  return false;
5579 
5580  if (isFloatingPointZero(Op)) {
5581  SeenZero = true;
5582  return true;
5583  }
5584  return ISD::isNormalLoad(N);
5585 }
5586 
5588  if (isFloatingPointZero(Op))
5589  return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5590 
5591  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5592  return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5593  Ld->getPointerInfo(), Ld->getAlign(),
5594  Ld->getMemOperand()->getFlags());
5595 
5596  llvm_unreachable("Unknown VFP cmp argument!");
5597 }
5598 
5600  SDValue &RetVal1, SDValue &RetVal2) {
5601  SDLoc dl(Op);
5602 
5603  if (isFloatingPointZero(Op)) {
5604  RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5605  RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5606  return;
5607  }
5608 
5609  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5610  SDValue Ptr = Ld->getBasePtr();
5611  RetVal1 =
5612  DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5613  Ld->getAlign(), Ld->getMemOperand()->getFlags());
5614 
5615  EVT PtrType = Ptr.getValueType();
5616  SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5617  PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5618  RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5619  Ld->getPointerInfo().getWithOffset(4),
5620  commonAlignment(Ld->getAlign(), 4),
5621  Ld->getMemOperand()->getFlags());
5622  return;
5623  }
5624 
5625  llvm_unreachable("Unknown VFP cmp argument!");
5626 }
5627 
5628 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5629 /// f32 and even f64 comparisons to integer ones.
5630 SDValue
5631 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5632  SDValue Chain = Op.getOperand(0);
5633  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5634  SDValue LHS = Op.getOperand(2);
5635  SDValue RHS = Op.getOperand(3);
5636  SDValue Dest = Op.getOperand(4);
5637  SDLoc dl(Op);
5638 
5639  bool LHSSeenZero = false;
5640  bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5641  bool RHSSeenZero = false;
5642  bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5643  if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5644  // If unsafe fp math optimization is enabled and there are no other uses of
5645  // the CMP operands, and the condition code is EQ or NE, we can optimize it
5646  // to an integer comparison.
5647  if (CC == ISD::SETOEQ)
5648  CC = ISD::SETEQ;
5649  else if (CC == ISD::SETUNE)
5650  CC = ISD::SETNE;
5651 
5652  SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5653  SDValue ARMcc;
5654  if (LHS.getValueType() == MVT::f32) {
5655  LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5656  bitcastf32Toi32(LHS, DAG), Mask);
5657  RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5658  bitcastf32Toi32(RHS, DAG), Mask);
5659  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5660  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5661  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5662  Chain, Dest, ARMcc, CCR, Cmp);
5663  }
5664 
5665  SDValue LHS1, LHS2;
5666  SDValue RHS1, RHS2;
5667  expandf64Toi32(LHS, DAG, LHS1, LHS2);
5668  expandf64Toi32(RHS, DAG, RHS1, RHS2);
5669  LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5670  RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5672  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5673  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5674  SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5675  return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5676  }
5677 
5678  return SDValue();
5679 }
5680 
5681 SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5682  SDValue Chain = Op.getOperand(0);
5683  SDValue Cond = Op.getOperand(1);
5684  SDValue Dest = Op.getOperand(2);
5685  SDLoc dl(Op);
5686 
5687  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5688  // instruction.
5689  unsigned Opc = Cond.getOpcode();
5690  bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5691  !Subtarget->isThumb1Only();
5692  if (Cond.getResNo() == 1 &&
5693  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5694  Opc == ISD::USUBO || OptimizeMul)) {
5695  // Only lower legal XALUO ops.
5696  if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5697  return SDValue();
5698 
5699  // The actual operation with overflow check.
5700  SDValue Value, OverflowCmp;
5701  SDValue ARMcc;
5702  std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5703 
5704  // Reverse the condition code.
5706  (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5708  ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5709  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5710 
5711  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5712  OverflowCmp);
5713  }
5714 
5715  return SDValue();
5716 }
5717 
5718 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5719  SDValue Chain = Op.getOperand(0);
5720  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5721  SDValue LHS = Op.getOperand(2);
5722  SDValue RHS = Op.getOperand(3);
5723  SDValue Dest = Op.getOperand(4);
5724  SDLoc dl(Op);
5725 
5726  if (isUnsupportedFloatingType(LHS.getValueType())) {
5728  DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5729 
5730  // If softenSetCCOperands only returned one value, we should compare it to
5731  // zero.
5732  if (!RHS.getNode()) {
5733  RHS = DAG.getConstant(0, dl, LHS.getValueType());
5734  CC = ISD::SETNE;
5735  }
5736  }
5737 
5738  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5739  // instruction.
5740  unsigned Opc = LHS.getOpcode();
5741  bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5742  !Subtarget->isThumb1Only();
5743  if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5744  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5745  Opc == ISD::USUBO || OptimizeMul) &&
5746  (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5747  // Only lower legal XALUO ops.
5748  if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5749  return SDValue();
5750 
5751  // The actual operation with overflow check.
5752  SDValue Value, OverflowCmp;
5753  SDValue ARMcc;
5754  std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5755 
5756  if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5757  // Reverse the condition code.
5759  (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5761  ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5762  }
5763  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5764 
5765  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5766  OverflowCmp);
5767  }
5768 
5769  if (LHS.getValueType() == MVT::i32) {
5770  SDValue ARMcc;
5771  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5772  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5773  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5774  Chain, Dest, ARMcc, CCR, Cmp);
5775  }
5776 
5777  if (getTargetMachine().Options.UnsafeFPMath &&
5778  (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5779  CC == ISD::SETNE || CC == ISD::SETUNE)) {
5780  if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5781  return Result;
5782  }
5783 
5784  ARMCC::CondCodes CondCode, CondCode2;
5785  FPCCToARMCC(CC, CondCode, CondCode2);
5786 
5787  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5788  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5789  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5790  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5791  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5792  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5793  if (CondCode2 != ARMCC::AL) {
5794  ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5795  SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5796  Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5797  }
5798  return Res;
5799 }
5800 
5801 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5802  SDValue Chain = Op.getOperand(0);
5803  SDValue Table = Op.getOperand(1);
5804  SDValue Index = Op.getOperand(2);
5805  SDLoc dl(Op);
5806 
5807  EVT PTy = getPointerTy(DAG.getDataLayout());
5808  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5809  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5810  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5811  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5812  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5813  if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5814  // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5815  // which does another jump to the destination. This also makes it easier
5816  // to translate it to TBB / TBH later (Thumb2 only).
5817  // FIXME: This might not work if the function is extremely large.
5818  return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5819  Addr, Op.getOperand(2), JTI);
5820  }
5821  if (isPositionIndependent() || Subtarget->isROPI()) {
5822  Addr =
5823  DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5825  Chain = Addr.getValue(1);
5826  Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5827  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5828  } else {
5829  Addr =
5830  DAG.getLoad(PTy, dl, Chain, Addr,
5832  Chain = Addr.getValue(1);
5833  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5834  }
5835 }
5836 
5838  EVT VT = Op.getValueType();
5839  SDLoc dl(Op);
5840 
5841  if (Op.getValueType().getVectorElementType() == MVT::i32) {
5842  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5843  return Op;
5844  return DAG.UnrollVectorOp(Op.getNode());
5845  }
5846 
5847  const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5848 
5849  EVT NewTy;
5850  const EVT OpTy = Op.getOperand(0).getValueType();
5851  if (OpTy == MVT::v4f32)
5852  NewTy = MVT::v4i32;
5853  else if (OpTy == MVT::v4f16 && HasFullFP16)
5854  NewTy = MVT::v4i16;
5855  else if (OpTy == MVT::v8f16 && HasFullFP16)
5856  NewTy = MVT::v8i16;
5857  else
5858  llvm_unreachable("Invalid type for custom lowering!");
5859 
5860  if (VT != MVT::v4i16 && VT != MVT::v8i16)
5861  return DAG.UnrollVectorOp(Op.getNode());
5862 
5863  Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5864  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5865 }
5866 
5867 SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5868  EVT VT = Op.getValueType();
5869  if (VT.isVector())
5870  return LowerVectorFP_TO_INT(Op, DAG);
5871 
5872  bool IsStrict = Op->isStrictFPOpcode();
5873  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5874 
5875  if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5876  RTLIB::Libcall LC;
5877  if (Op.getOpcode() == ISD::FP_TO_SINT ||
5878  Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5879  LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5880  Op.getValueType());
5881  else
5882  LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5883  Op.getValueType());
5884  SDLoc Loc(Op);
5885  MakeLibCallOptions CallOptions;
5886  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5887  SDValue Result;
5888  std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5889  CallOptions, Loc, Chain);
5890  return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5891  }
5892 
5893  // FIXME: Remove this when we have strict fp instruction selection patterns
5894  if (IsStrict) {
5895  SDLoc Loc(Op);
5896  SDValue Result =
5897  DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT
5898  : ISD::FP_TO_UINT,
5899  Loc, Op.getValueType(), SrcVal);
5900  return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5901  }
5902 
5903  return Op;
5904 }
5905 
5907  const ARMSubtarget *Subtarget) {
5908  EVT VT = Op.getValueType();
5909  EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5910  EVT FromVT = Op.getOperand(0).getValueType();
5911 
5912  if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5913  return Op;
5914  if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5915  Subtarget->hasFP64())
5916  return Op;
5917  if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5918  Subtarget->hasFullFP16())
5919  return Op;
5920  if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5921  Subtarget->hasMVEFloatOps())
5922  return Op;
5923  if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5924  Subtarget->hasMVEFloatOps())
5925  return Op;
5926 
5927  if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5928  return SDValue();
5929 
5930  SDLoc DL(Op);
5931  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5932  unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5933  SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5934  DAG.getValueType(VT.getScalarType()));
5935  SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5936  DAG.getConstant((1 << BW) - 1, DL, VT));
5937  if (IsSigned)
5938  Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5939  DAG.getConstant(-(1 << BW), DL, VT));
5940  return Max;
5941 }
5942 
5944  EVT VT = Op.getValueType();
5945  SDLoc dl(Op);
5946 
5947  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5948  if (VT.getVectorElementType() == MVT::f32)
5949  return Op;
5950  return DAG.UnrollVectorOp(Op.getNode());
5951  }
5952 
5953  assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
5954  Op.getOperand(0).getValueType() == MVT::v8i16) &&
5955  "Invalid type for custom lowering!");
5956 
5957  const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5958 
5959  EVT DestVecType;
5960  if (VT == MVT::v4f32)
5961  DestVecType = MVT::v4i32;
5962  else if (VT == MVT::v4f16 && HasFullFP16)
5963  DestVecType = MVT::v4i16;
5964  else if (VT == MVT::v8f16 && HasFullFP16)
5965  DestVecType = MVT::v8i16;
5966  else
5967  return DAG.UnrollVectorOp(Op.getNode());
5968 
5969  unsigned CastOpc;
5970  unsigned Opc;
5971  switch (Op.getOpcode()) {
5972  default: llvm_unreachable("Invalid opcode!");
5973  case ISD::SINT_TO_FP:
5974  CastOpc = ISD::SIGN_EXTEND;
5975  Opc = ISD::SINT_TO_FP;
5976  break;
5977  case ISD::UINT_TO_FP:
5978  CastOpc = ISD::ZERO_EXTEND;
5979  Opc = ISD::UINT_TO_FP;
5980  break;
5981  }
5982 
5983  Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5984  return DAG.getNode(Opc, dl, VT, Op);
5985 }
5986 
5987 SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5988  EVT VT = Op.getValueType();
5989  if (VT.isVector())
5990  return LowerVectorINT_TO_FP(Op, DAG);
5991  if (isUnsupportedFloatingType(VT)) {
5992  RTLIB::Libcall LC;
5993  if (Op.getOpcode() == ISD::SINT_TO_FP)
5994  LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
5995  Op.getValueType());
5996  else
5997  LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
5998  Op.getValueType());
5999  MakeLibCallOptions CallOptions;
6000  return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
6001  CallOptions, SDLoc(Op)).first;
6002  }
6003 
6004  return Op;
6005 }
6006 
6007 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
6008  // Implement fcopysign with a fabs and a conditional fneg.
6009  SDValue Tmp0 = Op.getOperand(0);
6010  SDValue Tmp1 = Op.getOperand(1);
6011  SDLoc dl(Op);
6012  EVT VT = Op.getValueType();
6013  EVT SrcVT = Tmp1.getValueType();
6014  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
6015  Tmp0.getOpcode() == ARMISD::VMOVDRR;
6016  bool UseNEON = !InGPR && Subtarget->hasNEON();
6017 
6018  if (UseNEON) {
6019  // Use VBSL to copy the sign bit.
6020  unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
6022  DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
6023  EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
6024  if (VT == MVT::f64)
6025  Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6026  DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6027  DAG.getConstant(32, dl, MVT::i32));
6028  else /*if (VT == MVT::f32)*/
6029  Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6030  if (SrcVT == MVT::f32) {
6031  Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6032  if (VT == MVT::f64)
6033  Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6034  DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6035  DAG.getConstant(32, dl, MVT::i32));
6036  } else if (VT == MVT::f32)
6037  Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6038  DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6039  DAG.getConstant(32, dl, MVT::i32));
6040  Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6041  Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6042 
6044  dl, MVT::i32);
6045  AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6046  SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6047  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6048 
6049  SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6050  DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6051  DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6052  if (VT == MVT::f32) {
6053  Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6054  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6055  DAG.getConstant(0, dl, MVT::i32));
6056  } else {
6057  Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6058  }
6059 
6060  return Res;
6061  }
6062 
6063  // Bitcast operand 1 to i32.
6064  if (SrcVT == MVT::f64)
6065  Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6066  Tmp1).getValue(1);
6067  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6068 
6069  // Or in the signbit with integer operations.
6070  SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6071  SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6072  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6073  if (VT == MVT::f32) {
6074  Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6075  DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6076  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6077  DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6078  }
6079 
6080  // f64: Or the high part with signbit and then combine two parts.
6081  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6082  Tmp0);
6083  SDValue Lo = Tmp0.getValue(0);
6084  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6085  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6086  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6087 }
6088 
6089 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6090  MachineFunction &MF = DAG.getMachineFunction();
6091  MachineFrameInfo &MFI = MF.getFrameInfo();
6092  MFI.setReturnAddressIsTaken(true);
6093 
6095  return SDValue();
6096 
6097  EVT VT = Op.getValueType();
6098  SDLoc dl(Op);
6099  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6100  if (Depth) {
6101  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6102  SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6103  return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6104  DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6105  MachinePointerInfo());
6106  }
6107 
6108  // Return LR, which contains the return address. Mark it an implicit live-in.
6109  Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6110  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6111 }
6112 
6113 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6114  const ARMBaseRegisterInfo &ARI =
6115  *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6116  MachineFunction &MF = DAG.getMachineFunction();
6117  MachineFrameInfo &MFI = MF.getFrameInfo();
6118  MFI.setFrameAddressIsTaken(true);
6119 
6120  EVT VT = Op.getValueType();
6121  SDLoc dl(Op); // FIXME probably not meaningful
6122  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6123  Register FrameReg = ARI.getFrameRegister(MF);
6124  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6125  while (Depth--)
6126  FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6127  MachinePointerInfo());
6128  return FrameAddr;
6129 }
6130 
6131 // FIXME? Maybe this could be a TableGen attribute on some registers and
6132 // this table could be generated automatically from RegInfo.
6133 Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6134  const MachineFunction &MF) const {
6136  .Case("sp", ARM::SP)
6137  .Default(0);
6138  if (Reg)
6139  return Reg;
6140  report_fatal_error(Twine("Invalid register name \""
6141  + StringRef(RegName) + "\"."));
6142 }
6143 
6144 // Result is 64 bit value so split into two 32 bit values and return as a
6145 // pair of values.
6147  SelectionDAG &DAG) {
6148  SDLoc DL(N);
6149 
6150  // This function is only supposed to be called for i64 type destination.
6151  assert(N->getValueType(0) == MVT::i64
6152  && "ExpandREAD_REGISTER called for non-i64 type result.");
6153 
6154  SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
6156  N->getOperand(0),
6157  N->getOperand(1));
6158 
6159  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6160  Read.getValue(1)));
6161  Results.push_back(Read.getOperand(0));
6162 }
6163 
6164 /// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6165 /// When \p DstVT, the destination type of \p BC, is on the vector
6166 /// register bank and the source of bitcast, \p Op, operates on the same bank,
6167 /// it might be possible to combine them, such that everything stays on the
6168 /// vector register bank.
6169 /// \p return The node that would replace \p BT, if the combine
6170 /// is possible.
6172  SelectionDAG &DAG) {
6173  SDValue Op = BC->getOperand(0);
6174  EVT DstVT = BC->getValueType(0);
6175 
6176  // The only vector instruction that can produce a scalar (remember,
6177  // since the bitcast was about to be turned into VMOVDRR, the source
6178  // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6179  // Moreover, we can do this combine only if there is one use.
6180  // Finally, if the destination type is not a vector, there is not
6181  // much point on forcing everything on the vector bank.
6182  if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6183  !Op.hasOneUse())
6184  return SDValue();
6185 
6186  // If the index is not constant, we will introduce an additional
6187  // multiply that will stick.
6188  // Give up in that case.
6189  ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6190  if (!Index)
6191  return SDValue();
6192  unsigned DstNumElt = DstVT.getVectorNumElements();
6193 
6194  // Compute the new index.
6195  const APInt &APIntIndex = Index->getAPIntValue();
6196  APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6197  NewIndex *= APIntIndex;
6198  // Check if the new constant index fits into i32.
6199  if (NewIndex.getBitWidth() > 32)
6200  return SDValue();
6201 
6202  // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6203  // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6204  SDLoc dl(Op);
6205  SDValue ExtractSrc = Op.getOperand(0);
6206  EVT VecVT = EVT::getVectorVT(
6207  *DAG.getContext(), DstVT.getScalarType(),
6208  ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6209  SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6210  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6211  DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6212 }
6213 
6214 /// ExpandBITCAST - If the target supports VFP, this function is called to
6215 /// expand a bit convert where either the source or destination type is i64 to
6216 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6217 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
6218 /// vectors), since the legalizer won't know what to do with that.
6219 SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6220  const ARMSubtarget *Subtarget) const {
6221  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6222  SDLoc dl(N);
6223  SDValue Op = N->getOperand(0);
6224 
6225  // This function is only supposed to be called for i16 and i64 types, either
6226  // as the source or destination of the bit convert.
6227  EVT SrcVT = Op.getValueType();
6228  EVT DstVT = N->getValueType(0);
6229 
6230  if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6231  (DstVT == MVT::f16 || DstVT == MVT::bf16))
6232  return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6234 
6235  if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6236  (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
6237  return DAG.getNode(
6238  ISD::TRUNCATE, SDLoc(N), DstVT,
6239  MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6240 
6241  if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6242  return SDValue();
6243 
6244  // Turn i64->f64 into VMOVDRR.
6245  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6246  // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6247  // if we can combine the bitcast with its source.
6248  if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
6249  return Val;
6250 
6252  DAG.getConstant(0, dl, MVT::i32));
6254  DAG.getConstant(1, dl, MVT::i32));
6255  return DAG.getNode(ISD::BITCAST, dl, DstVT,
6256  DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6257  }
6258 
6259  // Turn f64->i64 into VMOVRRD.
6260  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6261  SDValue Cvt;
6262  if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6263  SrcVT.getVectorNumElements() > 1)
6264  Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6265  DAG.getVTList(MVT::i32, MVT::i32),
6266  DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6267  else
6268  Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6269  DAG.getVTList(MVT::i32, MVT::i32), Op);
6270  // Merge the pieces into a single i64 value.
6271  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6272  }
6273 
6274  return SDValue();
6275 }
6276 
6277 /// getZeroVector - Returns a vector of specified type with all zero elements.
6278 /// Zero vectors are used to represent vector negation and in those cases
6279 /// will be implemented with the NEON VNEG instruction. However, VNEG does
6280 /// not support i64 elements, so sometimes the zero vectors will need to be
6281 /// explicitly constructed. Regardless, use a canonical VMOV to create the
6282 /// zero vector.
6283 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6284  assert(VT.isVector() && "Expected a vector type");
6285  // The canonical modified immediate encoding of a zero vector is....0!
6286  SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6287  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6288  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6289  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6290 }
6291 
6292 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6293 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
6294 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6295  SelectionDAG &DAG) const {
6296  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6297  EVT VT = Op.getValueType();
6298  unsigned VTBits = VT.getSizeInBits();
6299  SDLoc dl(Op);
6300  SDValue ShOpLo = Op.getOperand(0);
6301  SDValue ShOpHi = Op.getOperand(1);
6302  SDValue ShAmt = Op.getOperand(2);
6303  SDValue ARMcc;
6304  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6305  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6306 
6307  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6308 
6309  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6310  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6311  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6312  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6313  DAG.getConstant(VTBits, dl, MVT::i32));
6314  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6315  SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6316  SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6317  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6318  ISD::SETGE, ARMcc, DAG, dl);
6319  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
6320  ARMcc, CCR, CmpLo);
6321 
6322  SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6323  SDValue HiBigShift = Opc == ISD::SRA
6324  ? DAG.getNode(Opc, dl, VT, ShOpHi,
6325  DAG.getConstant(VTBits - 1, dl, VT))
6326  : DAG.getConstant(0, dl, VT);
6327  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6328  ISD::SETGE, ARMcc, DAG, dl);
6329  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6330  ARMcc, CCR, CmpHi);
6331 
6332  SDValue Ops[2] = { Lo, Hi };
6333  return DAG.getMergeValues(Ops, dl);
6334 }
6335 
6336 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6337 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
6338 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6339  SelectionDAG &DAG) const {
6340  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6341  EVT VT = Op.getValueType();
6342  unsigned VTBits = VT.getSizeInBits();
6343  SDLoc dl(Op);
6344  SDValue ShOpLo = Op.getOperand(0);
6345  SDValue ShOpHi = Op.getOperand(1);
6346  SDValue ShAmt = Op.getOperand(2);
6347  SDValue ARMcc;
6348  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6349 
6350  assert(Op.getOpcode() == ISD::SHL_PARTS);
6351  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6352  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6353  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6354  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6355  SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6356 
6357  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6358  DAG.getConstant(VTBits, dl, MVT::i32));
6359  SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6360  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6361  ISD::SETGE, ARMcc, DAG, dl);
6362  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6363  ARMcc, CCR, CmpHi);
6364 
6365  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6366  ISD::SETGE, ARMcc, DAG, dl);
6367  SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6368  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6369  DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
6370 
6371  SDValue Ops[2] = { Lo, Hi };
6372  return DAG.getMergeValues(Ops, dl);
6373 }
6374 
6375 SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6376  SelectionDAG &DAG) const {
6377  // The rounding mode is in bits 23:22 of the FPSCR.
6378  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6379  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6380  // so that the shift + and get folded into a bitfield extract.
6381  SDLoc dl(Op);
6382  SDValue Chain = Op.getOperand(0);
6383  SDValue Ops[] = {Chain,
6384  DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6385 
6386  SDValue FPSCR =
6388  Chain = FPSCR.getValue(1);
6389  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6390  DAG.getConstant(1U << 22, dl, MVT::i32));
6391  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6392  DAG.getConstant(22, dl, MVT::i32));
6393  SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6394  DAG.getConstant(3, dl, MVT::i32));
6395  return DAG.getMergeValues({And, Chain}, dl);
6396 }
6397 
6398 SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6399  SelectionDAG &DAG) const {
6400  SDLoc DL(Op);
6401  SDValue Chain = Op->getOperand(0);
6402  SDValue RMValue = Op->getOperand(1);
6403 
6404  // The rounding mode is in bits 23:22 of the FPSCR.
6405  // The llvm.set.rounding argument value to ARM rounding mode value mapping
6406  // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6407  // ((arg - 1) & 3) << 22).
6408  //
6409  // It is expected that the argument of llvm.set.rounding is within the
6410  // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6411  // responsibility of the code generated llvm.set.rounding to ensure this
6412  // condition.
6413 
6414  // Calculate new value of FPSCR[23:22].
6415  RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6416  DAG.getConstant(1, DL, MVT::i32));
6417  RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6418  DAG.getConstant(0x3, DL, MVT::i32));
6419  RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6421 
6422  // Get current value of FPSCR.
6423  SDValue Ops[] = {Chain,
6424  DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6425  SDValue FPSCR =
6427  Chain = FPSCR.getValue(1);
6428  FPSCR = FPSCR.getValue(0);
6429 
6430  // Put new rounding mode into FPSCR[23:22].
6431  const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6432  FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6433  DAG.getConstant(RMMask, DL, MVT::i32));
6434  FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6435  SDValue Ops2[] = {
6436  Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6437  return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6438 }
6439 
6441  const ARMSubtarget *ST) {
6442  SDLoc dl(N);
6443  EVT VT = N->getValueType(0);
6444  if (VT.isVector() && ST->hasNEON()) {
6445 
6446  // Compute the least significant set bit: LSB = X & -X
6447  SDValue X = N->getOperand(0);
6448  SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6449  SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6450 
6451  EVT ElemTy = VT.getVectorElementType();
6452 
6453  if (ElemTy == MVT::i8) {
6454  // Compute with: cttz(x) = ctpop(lsb - 1)
6455  SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6456  DAG.getTargetConstant(1, dl, ElemTy));
6457  SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6458  return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6459  }
6460 
6461  if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6462  (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6463  // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6464  unsigned NumBits = ElemTy.getSizeInBits();
6465  SDValue WidthMinus1 =
6466  DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6467  DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6468  SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6469  return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6470  }
6471 
6472  // Compute with: cttz(x) = ctpop(lsb - 1)
6473 
6474  // Compute LSB - 1.
6475  SDValue Bits;
6476  if (ElemTy == MVT::i64) {
6477  // Load constant 0xffff'ffff'ffff'ffff to register.
6478  SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6479  DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6480  Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6481  } else {
6482  SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6483  DAG.getTargetConstant(1, dl, ElemTy));
6484  Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6485  }
6486  return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6487  }
6488 
6489  if (!ST->hasV6T2Ops())
6490  return SDValue();
6491 
6492  SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6493  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6494 }
6495 
6497  const ARMSubtarget *ST) {
6498  EVT VT = N->getValueType(0);
6499  SDLoc DL(N);
6500 
6501  assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6502  assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6503  VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6504  "Unexpected type for custom ctpop lowering");
6505 
6506  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6507  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6508  SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6509  Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6510 
6511  // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6512  unsigned EltSize = 8;
6513  unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6514  while (EltSize != VT.getScalarSizeInBits()) {
6516  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6517  TLI.getPointerTy(DAG.getDataLayout())));
6518  Ops.push_back(Res);
6519 
6520  EltSize *= 2;
6521  NumElts /= 2;
6522  MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6523  Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6524  }
6525 
6526  return Res;
6527 }
6528 
6529 /// Getvshiftimm - Check if this is a valid build_vector for the immediate
6530 /// operand of a vector shift operation, where all the elements of the
6531 /// build_vector must have the same constant integer value.
6532 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6533  // Ignore bit_converts.
6534  while (Op.getOpcode() == ISD::BITCAST)
6535  Op = Op.getOperand(0);
6536  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6537  APInt SplatBits, SplatUndef;
6538  unsigned SplatBitSize;
6539  bool HasAnyUndefs;
6540  if (!BVN ||
6541  !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6542  ElementBits) ||
6543  SplatBitSize > ElementBits)
6544  return false;
6545  Cnt = SplatBits.getSExtValue();
6546  return true;
6547 }
6548 
6549 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
6550 /// operand of a vector shift left operation. That value must be in the range:
6551 /// 0 <= Value < ElementBits for a left shift; or
6552 /// 0 <= Value <= ElementBits for a long left shift.
6553 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6554  assert(VT.isVector() && "vector shift count is not a vector type");
6555  int64_t ElementBits = VT.getScalarSizeInBits();
6556  if (!getVShiftImm(Op, ElementBits, Cnt))
6557  return false;
6558  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6559 }
6560 
6561 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
6562 /// operand of a vector shift right operation. For a shift opcode, the value
6563 /// is positive, but for an intrinsic the value count must be negative. The
6564 /// absolute value must be in the range:
6565 /// 1 <= |Value| <= ElementBits for a right shift; or
6566 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6567 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6568  int64_t &Cnt) {
6569  assert(VT.isVector() && "vector shift count is not a vector type");
6570  int64_t ElementBits = VT.getScalarSizeInBits();
6571  if (!getVShiftImm(Op, ElementBits, Cnt))
6572  return false;
6573  if (!isIntrinsic)
6574  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6575  if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6576  Cnt = -Cnt;
6577  return true;
6578  }
6579  return false;
6580 }
6581 
6583  const ARMSubtarget *ST) {
6584  EVT VT = N->getValueType(0);
6585  SDLoc dl(N);
6586  int64_t Cnt;
6587 
6588  if (!VT.isVector())
6589  return SDValue();
6590 
6591  // We essentially have two forms here. Shift by an immediate and shift by a
6592  // vector register (there are also shift by a gpr, but that is just handled
6593  // with a tablegen pattern). We cannot easily match shift by an immediate in
6594  // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6595  // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6596  // signed or unsigned, and a negative shift indicates a shift right).
6597  if (N->getOpcode() == ISD::SHL) {
6598  if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6599  return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6600  DAG.getConstant(Cnt, dl, MVT::i32));
6601  return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6602  N->getOperand(1));
6603  }
6604 
6605  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6606  "unexpected vector shift opcode");
6607 
6608  if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6609  unsigned VShiftOpc =
6610  (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6611  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6612  DAG.getConstant(Cnt, dl, MVT::i32));
6613  }
6614 
6615  // Other right shifts we don't have operations for (we use a shift left by a
6616  // negative number).
6617  EVT ShiftVT = N->getOperand(1).getValueType();
6618  SDValue NegatedCount = DAG.getNode(
6619  ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6620  unsigned VShiftOpc =
6621  (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6622  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6623 }
6624 
6626  const ARMSubtarget *ST) {
6627  EVT VT = N->getValueType(0);
6628  SDLoc dl(N);
6629 
6630  // We can get here for a node like i32 = ISD::SHL i32, i64
6631  if (VT != MVT::i64)
6632  return SDValue();
6633 
6634  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6635  N->getOpcode() == ISD::SHL) &&
6636  "Unknown shift to lower!");
6637 
6638  unsigned ShOpc = N->getOpcode();
6639  if (ST->hasMVEIntegerOps()) {
6640  SDValue ShAmt = N->getOperand(1);
6641  unsigned ShPartsOpc = ARMISD::LSLL;
6642  ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6643 
6644  // If the shift amount is greater than 32 or has a greater bitwidth than 64
6645  // then do the default optimisation
6646  if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
6647  (Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32)))
6648  return SDValue();
6649 
6650  // Extract the lower 32 bits of the shift amount if it's not an i32
6651  if (ShAmt->getValueType(0) != MVT::i32)
6652  ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6653 
6654  if (ShOpc == ISD::SRL) {
6655  if (!Con)
6656  // There is no t2LSRLr instruction so negate and perform an lsll if the
6657  // shift amount is in a register, emulating a right shift.
6658  ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6659  DAG.getConstant(0, dl, MVT::i32), ShAmt);
6660  else
6661  // Else generate an lsrl on the immediate shift amount
6662  ShPartsOpc = ARMISD::LSRL;
6663  } else if (ShOpc == ISD::SRA)
6664  ShPartsOpc = ARMISD::ASRL;
6665 
6666  // Lower 32 bits of the destination/source
6667  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6668  DAG.getConstant(0, dl, MVT::i32));
6669  // Upper 32 bits of the destination/source
6670  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6671  DAG.getConstant(1, dl, MVT::i32));
6672 
6673  // Generate the shift operation as computed above
6674  Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6675  ShAmt);
6676  // The upper 32 bits come from the second return value of lsll
6677  Hi = SDValue(Lo.getNode(), 1);
6678  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6679  }
6680 
6681  // We only lower SRA, SRL of 1 here, all others use generic lowering.
6682  if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6683  return SDValue();
6684 
6685  // If we are in thumb mode, we don't have RRX.
6686  if (ST->isThumb1Only())
6687  return SDValue();
6688 
6689  // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6690  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6691  DAG.getConstant(0, dl, MVT::i32));
6692  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6693  DAG.getConstant(1, dl, MVT::i32));
6694 
6695  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
6696  // captures the result into a carry flag.
6697  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
6698  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6699 
6700  // The low part is an ARMISD::RRX operand, which shifts the carry in.
6701  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6702 
6703  // Merge the pieces into a single i64 value.
6704  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6705 }
6706 
6708  const ARMSubtarget *ST) {
6709  bool Invert = false;
6710  bool Swap = false;
6711  unsigned Opc = ARMCC::AL;
6712 
6713  SDValue Op0 = Op.getOperand(0);
6714  SDValue Op1 = Op.getOperand(1);
6715  SDValue CC = Op.getOperand(2);
6716  EVT VT = Op.getValueType();
6717  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6718  SDLoc dl(Op);
6719 
6720  EVT CmpVT;
6721  if (ST->hasNEON())
6723  else {
6724  assert(ST->hasMVEIntegerOps() &&
6725  "No hardware support for integer vector comparison!");
6726 
6727  if (Op.getValueType().getVectorElementType() != MVT::i1)
6728  return SDValue();
6729 
6730  // Make sure we expand floating point setcc to scalar if we do not have
6731  // mve.fp, so that we can handle them from there.
6732  if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6733  return SDValue();
6734 
6735  CmpVT = VT;
6736  }
6737 
6738  if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6739  (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6740  // Special-case integer 64-bit equality comparisons. They aren't legal,
6741  // but they can be lowered with a few vector instructions.
6742  unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6743  EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6744  SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6745  SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6746  SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6747  DAG.getCondCode(ISD::SETEQ));
6748  SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6749  SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6750  Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6751  if (SetCCOpcode == ISD::SETNE)
6752  Merged = DAG.getNOT(dl, Merged, CmpVT);
6753  Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6754  return Merged;
6755  }
6756 
6757  if (CmpVT.getVectorElementType() == MVT::i64)
6758  // 64-bit comparisons are not legal in general.
6759  return SDValue();
6760 
6761  if (Op1.getValueType().isFloatingPoint()) {
6762  switch (SetCCOpcode) {
6763  default: llvm_unreachable("Illegal FP comparison");
6764  case ISD::SETUNE:
6765  case ISD::SETNE:
6766  if (ST->hasMVEFloatOps()) {
6767  Opc = ARMCC::NE; break;
6768  } else {
6769  Invert = true; [[fallthrough]];
6770  }
6771  case ISD::SETOEQ:
6772  case ISD::SETEQ: Opc = ARMCC::EQ; break;
6773  case ISD::SETOLT:
6774  case ISD::SETLT: Swap = true; [[fallthrough]];
6775  case ISD::SETOGT:
6776  case ISD::SETGT: Opc = ARMCC::GT; break;
6777  case ISD::SETOLE:
6778  case ISD::SETLE: Swap = true; [[fallthrough]];
6779  case ISD::SETOGE:
6780  case ISD::SETGE: Opc = ARMCC::GE; break;
6781  case ISD::SETUGE: Swap = true; [[fallthrough]];
6782  case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6783  case ISD::SETUGT: Swap = true; [[fallthrough]];
6784  case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6785  case ISD::SETUEQ: Invert = true; [[fallthrough]];
6786  case ISD::SETONE: {
6787  // Expand this to (OLT | OGT).
6788  SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6789  DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6790  SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6791  DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6792  SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6793  if (Invert)
6794  Result = DAG.getNOT(dl, Result, VT);
6795  return Result;
6796  }
6797  case ISD::SETUO: Invert = true; [[fallthrough]];
6798  case ISD::SETO: {
6799  // Expand this to (OLT | OGE).
6800  SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6801  DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6802  SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6803  DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6804  SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6805  if (Invert)
6806  Result = DAG.getNOT(dl, Result, VT);
6807  return Result;
6808  }
6809  }
6810  } else {
6811  // Integer comparisons.
6812  switch (SetCCOpcode) {
6813  default: llvm_unreachable("Illegal integer comparison");
6814  case ISD::SETNE:
6815  if (ST->hasMVEIntegerOps()) {
6816  Opc = ARMCC::NE; break;
6817  } else {
6818  Invert = true; [[fallthrough]];
6819  }
6820  case ISD::SETEQ: Opc = ARMCC::EQ; break;
6821  case ISD::SETLT: Swap = true; [[fallthrough]];
6822  case ISD::SETGT: Opc = ARMCC::GT; break;
6823  case ISD::SETLE: Swap = true; [[fallthrough]];
6824  case ISD::SETGE: Opc = ARMCC::GE; break;
6825  case ISD::SETULT: Swap = true; [[fallthrough]];
6826  case ISD::SETUGT: Opc = ARMCC::HI; break;
6827  case ISD::SETULE: Swap = true; [[fallthrough]];
6828  case ISD::SETUGE: Opc = ARMCC::HS; break;
6829  }
6830 
6831  // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6832  if (ST->hasNEON() && Opc == ARMCC::EQ) {
6833  SDValue AndOp;
6835  AndOp = Op0;
6836  else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6837  AndOp = Op1;
6838 
6839  // Ignore bitconvert.
6840  if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6841  AndOp = AndOp.getOperand(0);
6842 
6843  if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6844  Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6845  Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6846  SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6847  if (!Invert)
6848  Result = DAG.getNOT(dl, Result, VT);
6849  return Result;
6850  }
6851  }
6852  }
6853 
6854  if (Swap)
6855  std::swap(Op0, Op1);
6856 
6857  // If one of the operands is a constant vector zero, attempt to fold the
6858  // comparison to a specialized compare-against-zero form.
6859  if (ISD::isBuildVectorAllZeros(Op0.getNode()) &&
6860  (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6861  Opc == ARMCC::NE)) {
6862  if (Opc == ARMCC::GE)
6863  Opc = ARMCC::LE;
6864  else if (Opc == ARMCC::GT)
6865  Opc = ARMCC::LT;
6866  std::swap(Op0, Op1);
6867  }
6868 
6869  SDValue Result;
6870  if (ISD::isBuildVectorAllZeros(Op1.getNode()) &&
6871  (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6872  Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6873  Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6874  DAG.getConstant(Opc, dl, MVT::i32));
6875  else
6876  Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6877  DAG.getConstant(Opc, dl, MVT::i32));
6878 
6879  Result = DAG.getSExtOrTrunc(Result, dl, VT);
6880 
6881  if (Invert)
6882  Result = DAG.getNOT(dl, Result, VT);
6883 
6884  return Result;
6885 }
6886 
6888  SDValue LHS = Op.getOperand(0);
6889  SDValue RHS = Op.getOperand(1);
6890  SDValue Carry = Op.getOperand(2);
6891  SDValue Cond = Op.getOperand(3);
6892  SDLoc DL(Op);
6893 
6894  assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
6895 
6896  // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
6897  // have to invert the carry first.
6898  Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6899  DAG.getConstant(1, DL, MVT::i32), Carry);
6900  // This converts the boolean value carry into the carry flag.
6901  Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6902 
6903  SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6904  SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6905 
6906  SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6907  SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6908  SDValue ARMcc = DAG.getConstant(
6909  IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6910  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6911  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6912  Cmp.getValue(1), SDValue());
6913  return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6914  CCR, Chain.getValue(1));
6915 }
6916 
6917 /// isVMOVModifiedImm - Check if the specified splat value corresponds to a
6918 /// valid vector constant for a NEON or MVE instruction with a "modified
6919 /// immediate" operand (e.g., VMOV). If so, return the encoded value.
6920 static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6921  unsigned SplatBitSize, SelectionDAG &DAG,
6922  const SDLoc &dl, EVT &VT, EVT VectorVT,
6923  VMOVModImmType type) {
6924  unsigned OpCmode, Imm;
6925  bool is128Bits = VectorVT.is128BitVector();
6926 
6927  // SplatBitSize is set to the smallest size that splats the vector, so a
6928  // zero vector will always have SplatBitSize == 8. However, NEON modified
6929  // immediate instructions others than VMOV do not support the 8-bit encoding
6930  // of a zero vector, and the default encoding of zero is supposed to be the
6931  // 32-bit version.
6932  if (SplatBits == 0)
6933  SplatBitSize = 32;
6934 
6935  switch (SplatBitSize) {
6936  case 8:
6937  if (type != VMOVModImm)
6938  return SDValue();
6939  // Any 1-byte value is OK. Op=0, Cmode=1110.
6940  assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
6941  OpCmode = 0xe;
6942  Imm = SplatBits;
6943  VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6944  break;
6945 
6946  case 16:
6947  // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
6948  VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6949  if ((SplatBits & ~0xff) == 0) {
6950  // Value = 0x00nn: Op=x, Cmode=100x.
6951  OpCmode = 0x8;
6952  Imm = SplatBits;
6953  break;
6954  }
6955  if ((SplatBits & ~0xff00) == 0) {
6956  // Value = 0xnn00: Op=x, Cmode=101x.
6957  OpCmode = 0xa;
6958  Imm = SplatBits >> 8;
6959  break;
6960  }
6961  return SDValue();
6962 
6963  case 32:
6964  // NEON's 32-bit VMOV supports splat values where:
6965  // * only one byte is nonzero, or
6966  // * the least significant byte is 0xff and the second byte is nonzero, or
6967  // * the least significant 2 bytes are 0xff and the third is nonzero.
6968  VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
6969  if ((SplatBits & ~0xff) == 0) {
6970  // Value = 0x000000nn: Op=x, Cmode=000x.
6971  OpCmode = 0;
6972  Imm = SplatBits;
6973  break;
6974  }
6975  if ((SplatBits & ~0xff00) == 0) {
6976  // Value = 0x0000nn00: Op=x, Cmode=001x.
6977  OpCmode = 0x2;
6978  Imm = SplatBits >> 8;
6979  break;
6980  }
6981  if ((SplatBits & ~0xff0000) == 0) {
6982  // Value = 0x00nn0000: Op=x, Cmode=010x.
6983  OpCmode = 0x4;
6984  Imm = SplatBits >> 16;
6985  break;
6986  }
6987  if ((SplatBits & ~0xff000000) == 0) {
6988  // Value = 0xnn000000: Op=x, Cmode=011x.
6989  OpCmode = 0x6;
6990  Imm = SplatBits >> 24;
6991  break;
6992  }
6993 
6994  // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
6995  if (type == OtherModImm) return SDValue();
6996 
6997  if ((SplatBits & ~0xffff) == 0 &&
6998  ((SplatBits | SplatUndef) & 0xff) == 0xff) {
6999  // Value = 0x0000nnff: Op=x, Cmode=1100.
7000  OpCmode = 0xc;
7001  Imm = SplatBits >> 8;
7002  break;
7003  }
7004 
7005  // cmode == 0b1101 is not supported for MVE VMVN
7006  if (type == MVEVMVNModImm)
7007  return SDValue();
7008 
7009  if ((SplatBits & ~0xffffff) == 0 &&
7010  ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7011  // Value = 0x00nnffff: Op=x, Cmode=1101.
7012  OpCmode = 0xd;
7013  Imm = SplatBits >> 16;
7014  break;
7015  }
7016 
7017  // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7018  // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
7019  // VMOV.I32. A (very) minor optimization would be to replicate the value
7020  // and fall through here to test for a valid 64-bit splat. But, then the
7021  // caller would also need to check and handle the change in size.
7022  return SDValue();
7023 
7024  case 64: {
7025  if (type != VMOVModImm)
7026  return SDValue();
7027  // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7028  uint64_t BitMask = 0xff;
7029  unsigned ImmMask = 1;
7030  Imm = 0;
7031  for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7032  if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7033  Imm |= ImmMask;
7034  } else if ((SplatBits & BitMask) != 0) {
7035  return SDValue();
7036  }
7037  BitMask <<= 8;
7038  ImmMask <<= 1;
7039  }
7040 
7041  if (DAG.getDataLayout().isBigEndian()) {
7042  // Reverse the order of elements within the vector.
7043  unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
7044  unsigned Mask = (1 << BytesPerElem) - 1;
7045  unsigned NumElems = 8 / BytesPerElem;
7046  unsigned NewImm = 0;
7047  for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
7048  unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
7049  NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7050  }
7051  Imm = NewImm;
7052  }
7053 
7054  // Op=1, Cmode=1110.
7055  OpCmode = 0x1e;
7056  VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7057  break;
7058  }
7059 
7060  default:
7061  llvm_unreachable("unexpected size for isVMOVModifiedImm");
7062  }
7063 
7064  unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7065  return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7066 }
7067 
7068 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7069  const ARMSubtarget *ST) const {
7070  EVT VT = Op.getValueType();
7071  bool IsDouble = (VT == MVT::f64);
7072  ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7073  const APFloat &FPVal = CFP->getValueAPF();
7074 
7075  // Prevent floating-point constants from using literal loads
7076  // when execute-only is enabled.
7077  if (ST->genExecuteOnly()) {
7078  // If we can represent the constant as an immediate, don't lower it
7079  if (isFPImmLegal(FPVal, VT))
7080  return Op;
7081  // Otherwise, construct as integer, and move to float register
7082  APInt INTVal = FPVal.bitcastToAPInt();
7083  SDLoc DL(CFP);
7084  switch (VT.getSimpleVT().SimpleTy) {
7085  default:
7086  llvm_unreachable("Unknown floating point type!");
7087  break;
7088  case MVT::f64: {
7089  SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7090  SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7091  return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7092  }
7093  case MVT::f32:
7094  return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7095  DAG.getConstant(INTVal, DL, MVT::i32));
7096  }
7097  }
7098 
7099  if (!ST->hasVFP3Base())
7100  return SDValue();
7101 
7102  // Use the default (constant pool) lowering for double constants when we have
7103  // an SP-only FPU
7104  if (IsDouble && !Subtarget->hasFP64())
7105  return SDValue();
7106 
7107  // Try splatting with a VMOV.f32...
7108  int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7109 
7110  if (ImmVal != -1) {
7111  if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7112  // We have code in place to select a valid ConstantFP already, no need to
7113  // do any mangling.
7114  return Op;
7115  }
7116 
7117  // It's a float and we are trying to use NEON operations where
7118  // possible. Lower it to a splat followed by an extract.
7119  SDLoc DL(Op);
7120  SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7121  SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7122  NewVal);
7123  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7124  DAG.getConstant(0, DL, MVT::i32));
7125  }
7126 
7127  // The rest of our options are NEON only, make sure that's allowed before
7128  // proceeding..
7129  if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7130  return SDValue();
7131 
7132  EVT VMovVT;
7133  uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7134 
7135  // It wouldn't really be worth bothering for doubles except for one very
7136  // important value, which does happen to match: 0.0. So make sure we don't do
7137  // anything stupid.
7138  if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7139  return SDValue();
7140 
7141  // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7142  SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7143  VMovVT, VT, VMOVModImm);
7144  if (NewVal != SDValue()) {
7145  SDLoc DL(Op);
7146  SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7147  NewVal);
7148  if (IsDouble)
7149  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7150 
7151  // It's a float: cast and extract a vector element.
7152  SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7153  VecConstant);
7154  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7155  DAG.getConstant(0, DL, MVT::i32));
7156  }
7157 
7158  // Finally, try a VMVN.i32
7159  NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7160  VT, VMVNModImm);
7161  if (NewVal != SDValue()) {
7162  SDLoc DL(Op);
7163  SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7164 
7165  if (IsDouble)
7166  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7167 
7168  // It's a float: cast and extract a vector element.
7169  SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7170  VecConstant);
7171  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7172  DAG.getConstant(0, DL, MVT::i32));
7173  }
7174 
7175  return SDValue();
7176 }
7177 
7178 // check if an VEXT instruction can handle the shuffle mask when the
7179 // vector sources of the shuffle are the same.
7180 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7181  unsigned NumElts = VT.getVectorNumElements();
7182 
7183  // Assume that the first shuffle index is not UNDEF. Fail if it is.
7184  if (M[0] < 0)
7185  return false;
7186 
7187  Imm = M[0];
7188 
7189  // If this is a VEXT shuffle, the immediate value is the index of the first
7190  // element. The other shuffle indices must be the successive elements after
7191  // the first one.
7192  unsigned ExpectedElt = Imm;
7193  for (unsigned i = 1; i < NumElts; ++i) {
7194  // Increment the expected index. If it wraps around, just follow it
7195  // back to index zero and keep going.
7196  ++ExpectedElt;
7197  if (ExpectedElt == NumElts)
7198  ExpectedElt = 0;
7199 
7200  if (M[i] < 0) continue; // ignore UNDEF indices
7201  if (ExpectedElt != static_cast<unsigned>(M[i]))
7202  return false;
7203  }
7204 
7205  return true;
7206 }
7207 
7208 static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7209  bool &ReverseVEXT, unsigned &Imm) {
7210  unsigned NumElts = VT.getVectorNumElements();
7211  ReverseVEXT = false;
7212 
7213  // Assume that the first shuffle index is not UNDEF. Fail if it is.
7214  if (M[0] < 0)
7215  return false;
7216 
7217  Imm = M[0];
7218 
7219  // If this is a VEXT shuffle, the immediate value is the index of the first
7220  // element. The other shuffle indices must be the successive elements after
7221  // the first one.
7222  unsigned ExpectedElt = Imm;
7223  for (unsigned i = 1; i < NumElts; ++i) {
7224  // Increment the expected index. If it wraps around, it may still be
7225  // a VEXT but the source vectors must be swapped.
7226  ExpectedElt += 1;
7227  if (ExpectedElt == NumElts * 2) {
7228  ExpectedElt = 0;
7229  ReverseVEXT = true;
7230  }
7231 
7232  if (M[i] < 0) continue; // ignore UNDEF indices
7233  if (ExpectedElt != static_cast<unsigned>(M[i]))
7234  return false;
7235  }
7236 
7237  // Adjust the index value if the source operands will be swapped.
7238  if (ReverseVEXT)
7239  Imm -= NumElts;
7240 
7241  return true;
7242 }
7243 
7244 static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7245  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7246  // range, then 0 is placed into the resulting vector. So pretty much any mask
7247  // of 8 elements can work here.
7248  return VT == MVT::v8i8 && M.size() == 8;
7249 }
7250 
7251 static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7252  unsigned Index) {
7253  if (Mask.size() == Elements * 2)
7254  return Index / Elements;
7255  return Mask[Index] == 0 ? 0 : 1;
7256 }
7257 
7258 // Checks whether the shuffle mask represents a vector transpose (VTRN) by
7259 // checking that pairs of elements in the shuffle mask represent the same index
7260 // in each vector, incrementing the expected index by 2 at each step.
7261 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7262 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7263 // v2={e,f,g,h}
7264 // WhichResult gives the offset for each element in the mask based on which
7265 // of the two results it belongs to.
7266 //
7267 // The transpose can be represented either as:
7268 // result1 = shufflevector v1, v2, result1_shuffle_mask
7269 // result2 = shufflevector v1, v2, result2_shuffle_mask
7270 // where v1/v2 and the shuffle masks have the same number of elements
7271 // (here WhichResult (see below) indicates which result is being checked)
7272 //
7273 // or as:
7274 // results = shufflevector v1, v2, shuffle_mask
7275 // where both results are returned in one vector and the shuffle mask has twice
7276 // as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7277 // want to check the low half and high half of the shuffle mask as if it were
7278 // the other case
7279 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7280  unsigned EltSz = VT.getScalarSizeInBits();
7281  if (EltSz == 64)
7282  return false;
7283 
7284  unsigned NumElts = VT.getVectorNumElements();
7285  if (M.size() != NumElts && M.size() != NumElts*2)
7286  return false;
7287 
7288  // If the mask is twice as long as the input vector then we need to check the
7289  // upper and lower parts of the mask with a matching value for WhichResult
7290  // FIXME: A mask with only even values will be rejected in case the first
7291  // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7292  // M[0] is used to determine WhichResult
7293  for (unsigned i = 0; i < M.size(); i += NumElts) {
7294  WhichResult = SelectPairHalf(NumElts, M, i);
7295  for (unsigned j = 0; j < NumElts; j += 2) {
7296  if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7297  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7298  return false;
7299  }
7300  }
7301 
7302  if (M.size() == NumElts*2)
7303  WhichResult = 0;
7304 
7305  return true;
7306 }
7307 
7308 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7309 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7310 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7311 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7312  unsigned EltSz = VT.getScalarSizeInBits();
7313  if (EltSz == 64)
7314  return false;
7315 
7316  unsigned NumElts = VT.getVectorNumElements();
7317  if (M.size() != NumElts && M.size() != NumElts*2)
7318  return false;
7319 
7320  for (unsigned i = 0; i < M.size(); i += NumElts) {
7321  WhichResult = SelectPairHalf(NumElts, M, i);
7322  for (unsigned j = 0; j < NumElts; j += 2) {
7323  if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7324  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7325  return false;
7326  }
7327  }
7328 
7329  if (M.size() == NumElts*2)
7330  WhichResult = 0;
7331 
7332  return true;
7333 }
7334 
7335 // Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7336 // that the mask elements are either all even and in steps of size 2 or all odd
7337 // and in steps of size 2.
7338 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7339 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7340 // v2={e,f,g,h}
7341 // Requires similar checks to that of isVTRNMask with
7342 // respect the how results are returned.
7343 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7344  unsigned EltSz = VT.getScalarSizeInBits();
7345  if (EltSz == 64)
7346  return false;
7347 
7348  unsigned NumElts = VT.getVectorNumElements();
7349  if (M.size() != NumElts && M.size() != NumElts*2)
7350  return false;
7351 
7352  for (unsigned i = 0; i < M.size(); i += NumElts) {
7353  WhichResult = SelectPairHalf(NumElts, M, i);
7354  for (unsigned j = 0; j < NumElts; ++j) {
7355  if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7356  return false;
7357  }
7358  }
7359 
7360  if (M.size() == NumElts*2)
7361  WhichResult = 0;
7362 
7363  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7364  if (VT.is64BitVector() && EltSz == 32)
7365  return false;
7366 
7367  return true;
7368 }
7369 
7370 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7371 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7372 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7373 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7374  unsigned EltSz = VT.getScalarSizeInBits();
7375  if (EltSz == 64)
7376  return false;
7377 
7378  unsigned NumElts = VT.getVectorNumElements();
7379  if (M.size() != NumElts && M.size() != NumElts*2)
7380  return false;
7381 
7382  unsigned Half = NumElts / 2;
7383  for (unsigned i = 0; i < M.size(); i += NumElts) {
7384  WhichResult = SelectPairHalf(NumElts, M, i);
7385  for (unsigned j = 0; j < NumElts; j += Half) {
7386  unsigned Idx = WhichResult;
7387  for (unsigned k = 0; k < Half; ++k) {
7388  int MIdx = M[i + j + k];
7389  if (MIdx >= 0 && (unsigned) MIdx != Idx)
7390  return false;
7391  Idx += 2;
7392  }
7393  }
7394  }
7395 
7396  if (M.size() == NumElts*2)
7397  WhichResult = 0;
7398 
7399  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7400  if (VT.is64BitVector() && EltSz == 32)
7401  return false;
7402 
7403  return true;
7404 }
7405 
7406 // Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7407 // that pairs of elements of the shufflemask represent the same index in each
7408 // vector incrementing sequentially through the vectors.
7409 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7410 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7411 // v2={e,f,g,h}
7412 // Requires similar checks to that of isVTRNMask with respect the how results
7413 // are returned.
7414 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7415  unsigned EltSz = VT.getScalarSizeInBits();
7416  if (EltSz == 64)
7417  return false;
7418 
7419  unsigned NumElts = VT.getVectorNumElements();
7420  if (M.size() != NumElts && M.size() != NumElts*2)
7421  return false;
7422 
7423  for (unsigned i = 0; i < M.size(); i += NumElts) {
7424  WhichResult = SelectPairHalf(NumElts, M, i);
7425  unsigned Idx = WhichResult * NumElts / 2;
7426  for (unsigned j = 0; j < NumElts; j += 2) {
7427  if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7428  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7429  return false;
7430  Idx += 1;
7431  }
7432  }
7433 
7434  if (M.size() == NumElts*2)
7435  WhichResult = 0;
7436 
7437  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7438  if (VT.is64BitVector() && EltSz == 32)
7439  return false;
7440 
7441  return true;
7442 }
7443 
7444 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7445 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7446 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7447 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7448  unsigned EltSz = VT.getScalarSizeInBits();
7449  if (EltSz == 64)
7450  return false;
7451 
7452  unsigned NumElts = VT.getVectorNumElements();
7453  if (M.size() != NumElts && M.size() != NumElts*2)
7454  return false;
7455 
7456  for (unsigned i = 0; i < M.size(); i += NumElts) {
7457  WhichResult = SelectPairHalf(NumElts, M, i);
7458  unsigned Idx = WhichResult * NumElts / 2;
7459  for (unsigned j = 0; j < NumElts; j += 2) {
7460  if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7461  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7462  return false;
7463  Idx += 1;
7464  }
7465  }
7466 
7467  if (M.size() == NumElts*2)
7468  WhichResult = 0;
7469 
7470  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7471  if (VT.is64BitVector() && EltSz == 32)
7472  return false;
7473 
7474  return true;
7475 }
7476 
7477 /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7478 /// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7479 static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7480  unsigned &WhichResult,
7481  bool &isV_UNDEF) {
7482  isV_UNDEF = false;
7483  if (isVTRNMask(ShuffleMask, VT, WhichResult))
7484  return ARMISD::VTRN;
7485  if (isVUZPMask(ShuffleMask, VT, WhichResult))
7486  return ARMISD::VUZP;
7487  if (isVZIPMask(ShuffleMask, VT, WhichResult))
7488  return ARMISD::VZIP;
7489 
7490  isV_UNDEF = true;
7491  if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7492  return ARMISD::VTRN;
7493  if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7494  return ARMISD::VUZP;
7495  if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7496  return ARMISD::VZIP;
7497 
7498  return 0;
7499 }
7500 
7501 /// \return true if this is a reverse operation on an vector.
7502 static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7503  unsigned NumElts = VT.getVectorNumElements();
7504  // Make sure the mask has the right size.
7505  if (NumElts != M.size())
7506  return false;
7507 
7508  // Look for <15, ..., 3, -1, 1, 0>.
7509  for (unsigned i = 0; i != NumElts; ++i)
7510  if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7511  return false;
7512 
7513  return true;
7514 }
7515 
7516 static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7517  unsigned NumElts = VT.getVectorNumElements();
7518  // Make sure the mask has the right size.
7519  if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7520  return false;
7521 
7522  // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7523  // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7524  // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7525  // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7526  // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7527  int Ofs = Top ? 1 : 0;
7528  int Upper = SingleSource ? 0 : NumElts;
7529  for (int i = 0, e = NumElts / 2; i != e; ++i) {
7530  if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7531  return false;
7532  if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7533  return false;
7534  }
7535  return true;
7536 }
7537 
7538 static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7539  unsigned NumElts = VT.getVectorNumElements();
7540  // Make sure the mask has the right size.
7541  if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7542  return false;
7543 
7544  // If Top
7545  // Look for <0, N, 2, N+2, 4, N+4, ..>.
7546  // This inserts Input2 into Input1
7547  // else if not Top
7548  // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7549  // This inserts Input1 into Input2
7550  unsigned Offset = Top ? 0 : 1;
7551  unsigned N = SingleSource ? 0 : NumElts;
7552  for (unsigned i = 0; i < NumElts; i += 2) {
7553  if (M[i] >= 0 && M[i] != (int)i)
7554  return false;
7555  if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7556  return false;
7557  }
7558 
7559  return true;
7560 }
7561 
7562 static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7563  unsigned NumElts = ToVT.getVectorNumElements();
7564  if (NumElts != M.size())
7565  return false;
7566 
7567  // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7568  // looking for patterns of:
7569  // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7570  // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7571 
7572  unsigned Off0 = rev ? NumElts / 2 : 0;
7573  unsigned Off1 = rev ? 0 : NumElts / 2;
7574  for (unsigned i = 0; i < NumElts; i += 2) {
7575  if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7576  return false;
7577  if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7578  return false;
7579  }
7580 
7581  return true;
7582 }
7583 
7584 // Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7585 // from a pair of inputs. For example:
7586 // BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7587 // FP_ROUND(EXTRACT_ELT(Y, 0),
7588 // FP_ROUND(EXTRACT_ELT(X, 1),
7589 // FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7591  const ARMSubtarget *ST) {
7592  assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7593  if (!ST->hasMVEFloatOps())
7594  return SDValue();
7595 
7596  SDLoc dl(BV);
7597  EVT VT = BV.getValueType();
7598  if (VT != MVT::v8f16)
7599  return SDValue();
7600 
7601  // We are looking for a buildvector of fptrunc elements, where all the
7602  // elements are interleavingly extracted from two sources. Check the first two
7603  // items are valid enough and extract some info from them (they are checked
7604  // properly in the loop below).
7605  if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7607  BV.getOperand(0).getOperand(0).getConstantOperandVal(1) != 0)
7608  return SDValue();
7609  if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7611  BV.getOperand(1).getOperand(0).getConstantOperandVal(1) != 0)
7612  return SDValue();
7613  SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7614  SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7615  if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7616  return SDValue();
7617 
7618  // Check all the values in the BuildVector line up with our expectations.
7619  for (unsigned i = 1; i < 4; i++) {
7620  auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7621  return Trunc.getOpcode() == ISD::FP_ROUND &&
7623  Trunc.getOperand(0).getOperand(0) == Op &&
7624  Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7625  };
7626  if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7627  return SDValue();
7628  if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7629  return SDValue();
7630  }
7631 
7632  SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7633  DAG.getConstant(0, dl, MVT::i32));
7634  return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7635  DAG.getConstant(1, dl, MVT::i32));
7636 }
7637 
7638 // Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7639 // from a single input on alternating lanes. For example:
7640 // BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7641 // FP_ROUND(EXTRACT_ELT(X, 2),
7642 // FP_ROUND(EXTRACT_ELT(X, 4), ...)
7644  const ARMSubtarget *ST) {
7645  assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7646  if (!ST->hasMVEFloatOps())
7647  return SDValue();
7648 
7649  SDLoc dl(BV);
7650  EVT VT = BV.getValueType();
7651  if (VT != MVT::v4f32)
7652  return SDValue();
7653 
7654  // We are looking for a buildvector of fptext elements, where all the
7655  // elements are alternating lanes from a single source. For example <0,2,4,6>
7656  // or <1,3,5,7>. Check the first two items are valid enough and extract some
7657  // info from them (they are checked properly in the loop below).
7658  if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7660  return SDValue();
7661  SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7663  if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7664  return SDValue();
7665 
7666  // Check all the values in the BuildVector line up with our expectations.
7667  for (unsigned i = 1; i < 4; i++) {
7668  auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7669  return Trunc.getOpcode() == ISD::FP_EXTEND &&
7671  Trunc.getOperand(0).getOperand(0) == Op &&
7672  Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7673  };
7674  if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7675  return SDValue();
7676  }
7677 
7678  return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7679  DAG.getConstant(Offset, dl, MVT::i32));
7680 }
7681 
7682 // If N is an integer constant that can be moved into a register in one
7683 // instruction, return an SDValue of such a constant (will become a MOV
7684 // instruction). Otherwise return null.
7686  const ARMSubtarget *ST, const SDLoc &dl) {
7687  uint64_t Val;
7688  if (!isa<ConstantSDNode>(N))
7689  return SDValue();
7690  Val = cast<ConstantSDNode>(N)->getZExtValue();
7691 
7692  if (ST->isThumb1Only()) {
7693  if (Val <= 255 || ~Val <= 255)
7694  return DAG.getConstant(Val, dl, MVT::i32);
7695  } else {
7696  if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7697  return DAG.getConstant(Val, dl, MVT::i32);
7698  }
7699  return SDValue();
7700 }
7701 
7703  const ARMSubtarget *ST) {
7704  SDLoc dl(Op);
7705  EVT VT = Op.getValueType();
7706 
7707  assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7708 
7709  unsigned NumElts = VT.getVectorNumElements();
7710  unsigned BoolMask;
7711  unsigned BitsPerBool;
7712  if (NumElts == 2) {
7713  BitsPerBool = 8;
7714  BoolMask = 0xff;
7715  } else if (NumElts == 4) {
7716  BitsPerBool = 4;
7717  BoolMask = 0xf;
7718  } else if (NumElts == 8) {
7719  BitsPerBool = 2;
7720  BoolMask = 0x3;
7721  } else if (NumElts == 16) {
7722  BitsPerBool = 1;
7723  BoolMask = 0x1;
7724  } else
7725  return SDValue();
7726 
7727  // If this is a single value copied into all lanes (a splat), we can just sign
7728  // extend that single value
7729  SDValue FirstOp = Op.getOperand(0);
7730  if (!isa<ConstantSDNode>(FirstOp) &&
7731  llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7732  return U.get().isUndef() || U.get() == FirstOp;
7733  })) {
7734  SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7735  DAG.getValueType(MVT::i1));
7736  return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7737  }
7738 
7739  // First create base with bits set where known
7740  unsigned Bits32 = 0;
7741  for (unsigned i = 0; i < NumElts; ++i) {
7742  SDValue V = Op.getOperand(i);
7743  if (!isa<ConstantSDNode>(V) && !V.isUndef())
7744  continue;
7745  bool BitSet = V.isUndef() ? false : cast<ConstantSDNode>(V)->getZExtValue();
7746  if (BitSet)
7747  Bits32 |= BoolMask << (i * BitsPerBool);
7748  }
7749 
7750  // Add in unknown nodes
7752  DAG.getConstant(Bits32, dl, MVT::i32));
7753  for (unsigned i = 0; i < NumElts; ++i) {
7754  SDValue V = Op.getOperand(i);
7755  if (isa<ConstantSDNode>(V) || V.isUndef())
7756  continue;
7757  Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7758  DAG.getConstant(i, dl, MVT::i32));
7759  }
7760 
7761  return Base;
7762 }
7763 
7765  const ARMSubtarget *ST) {
7766  if (!ST->hasMVEIntegerOps())
7767  return SDValue();
7768 
7769  // We are looking for a buildvector where each element is Op[0] + i*N
7770  EVT VT = Op.getValueType();
7771  SDValue Op0 = Op.getOperand(0);
7772  unsigned NumElts = VT.getVectorNumElements();
7773 
7774  // Get the increment value from operand 1
7775  SDValue Op1 = Op.getOperand(1);
7776  if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7777  !isa<ConstantSDNode>(Op1.getOperand(1)))
7778  return SDValue();
7779  unsigned N = Op1.getConstantOperandVal(1);
7780  if (N != 1 && N != 2 && N != 4 && N != 8)
7781  return SDValue();
7782 
7783  // Check that each other operand matches
7784  for (unsigned I = 2; I < NumElts; I++) {
7785  SDValue OpI = Op.getOperand(I);
7786  if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7787  !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7788  OpI.getConstantOperandVal(1) != I * N)
7789  return SDValue();
7790  }
7791 
7792  SDLoc DL(Op);
7793  return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7794  DAG.getConstant(N, DL, MVT::i32));
7795 }
7796 
7797 // Returns true if the operation N can be treated as qr instruction variant at
7798 // operand Op.
7799 static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7800  switch (N->getOpcode()) {
7801  case ISD::ADD:
7802  case ISD::MUL:
7803  case ISD::SADDSAT:
7804  case ISD::UADDSAT:
7805  return true;
7806  case ISD::SUB:
7807  case ISD::SSUBSAT:
7808  case ISD::USUBSAT:
7809  return N->getOperand(1).getNode() == Op;
7811  switch (N->getConstantOperandVal(0)) {
7812  case Intrinsic::arm_mve_add_predicated:
7813  case Intrinsic::arm_mve_mul_predicated:
7814  case Intrinsic::arm_mve_qadd_predicated:
7815  case Intrinsic::arm_mve_vhadd:
7816  case Intrinsic::arm_mve_hadd_predicated:
7817  case Intrinsic::arm_mve_vqdmulh:
7818  case Intrinsic::arm_mve_qdmulh_predicated:
7819  case Intrinsic::arm_mve_vqrdmulh:
7820  case Intrinsic::arm_mve_qrdmulh_predicated:
7821  case Intrinsic::arm_mve_vqdmull:
7822  case Intrinsic::arm_mve_vqdmull_predicated:
7823  return true;
7824  case Intrinsic::arm_mve_sub_predicated:
7825  case Intrinsic::arm_mve_qsub_predicated:
7826  case Intrinsic::arm_mve_vhsub:
7827  case Intrinsic::arm_mve_hsub_predicated:
7828  return N->getOperand(2).getNode() == Op;
7829  default:
7830  return false;
7831  }
7832  default:
7833  return false;
7834  }
7835 }
7836 
7837 // If this is a case we can't handle, return null and let the default
7838 // expansion code take care of it.
7839 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7840  const ARMSubtarget *ST) const {
7841  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7842  SDLoc dl(Op);
7843  EVT VT = Op.getValueType();
7844 
7845  if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7846  return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7847 
7848  if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7849  return R;
7850 
7851  APInt SplatBits, SplatUndef;
7852  unsigned SplatBitSize;
7853  bool HasAnyUndefs;
7854  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7855  if (SplatUndef.isAllOnes())
7856  return DAG.getUNDEF(VT);
7857 
7858  // If all the users of this constant splat are qr instruction variants,
7859  // generate a vdup of the constant.
7860  if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7861  (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7862  all_of(BVN->uses(),
7863  [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7864  EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7865  : SplatBitSize == 16 ? MVT::v8i16
7866  : MVT::v16i8;
7867  SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7868  SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7869  return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7870  }
7871 
7872  if ((ST->hasNEON() && SplatBitSize <= 64) ||
7873  (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7874  // Check if an immediate VMOV works.
7875  EVT VmovVT;
7876  SDValue Val =
7877  isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7878  SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7879 
7880  if (Val.getNode()) {
7881  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7882  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7883  }
7884 
7885  // Try an immediate VMVN.
7886  uint64_t NegatedImm = (~SplatBits).getZExtValue();
7887  Val = isVMOVModifiedImm(
7888  NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7889  VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7890  if (Val.getNode()) {
7891  SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7892  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7893  }
7894 
7895  // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7896  if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7897  int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7898  if (ImmVal != -1) {
7899  SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7900  return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7901  }
7902  }
7903 
7904  // If we are under MVE, generate a VDUP(constant), bitcast to the original
7905  // type.
7906  if (ST->hasMVEIntegerOps() &&
7907  (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7908  EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7909  : SplatBitSize == 16 ? MVT::v8i16
7910  : MVT::v16i8;
7911  SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7912  SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7913  return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7914  }
7915  }
7916  }
7917 
7918  // Scan through the operands to see if only one value is used.
7919  //
7920  // As an optimisation, even if more than one value is used it may be more
7921  // profitable to splat with one value then change some lanes.
7922  //
7923  // Heuristically we decide to do this if the vector has a "dominant" value,
7924  // defined as splatted to more than half of the lanes.
7925  unsigned NumElts = VT.getVectorNumElements();
7926  bool isOnlyLowElement = true;
7927  bool usesOnlyOneValue = true;
7928  bool hasDominantValue = false;
7929  bool isConstant = true;
7930 
7931  // Map of the number of times a particular SDValue appears in the
7932  // element list.
7933  DenseMap<SDValue, unsigned> ValueCounts;
7934  SDValue Value;
7935  for (unsigned i = 0; i < NumElts; ++i) {
7936  SDValue V = Op.getOperand(i);
7937  if (V.isUndef())
7938  continue;
7939  if (i > 0)
7940  isOnlyLowElement = false;
7941  if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
7942  isConstant = false;
7943 
7944  ValueCounts.insert(std::make_pair(V, 0));
7945  unsigned &Count = ValueCounts[V];
7946 
7947  // Is this value dominant? (takes up more than half of the lanes)
7948  if (++Count > (NumElts / 2)) {
7949  hasDominantValue = true;
7950  Value = V;
7951  }
7952  }
7953  if (ValueCounts.size() != 1)
7954  usesOnlyOneValue = false;
7955  if (!Value.getNode() && !ValueCounts.empty())
7956  Value = ValueCounts.begin()->first;
7957 
7958  if (ValueCounts.empty())
7959  return DAG.getUNDEF(VT);
7960 
7961  // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
7962  // Keep going if we are hitting this case.
7963  if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
7964  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
7965 
7966  unsigned EltSize = VT.getScalarSizeInBits();
7967 
7968  // Use VDUP for non-constant splats. For f32 constant splats, reduce to
7969  // i32 and try again.
7970  if (hasDominantValue && EltSize <= 32) {
7971  if (!isConstant) {
7972  SDValue N;
7973 
7974  // If we are VDUPing a value that comes directly from a vector, that will
7975  // cause an unnecessary move to and from a GPR, where instead we could
7976  // just use VDUPLANE. We can only do this if the lane being extracted
7977  // is at a constant index, as the VDUP from lane instructions only have
7978  // constant-index forms.
7979  ConstantSDNode *constIndex;
7980  if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7981  (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
7982  // We need to create a new undef vector to use for the VDUPLANE if the
7983  // size of the vector from which we get the value is different than the
7984  // size of the vector that we need to create. We will insert the element
7985  // such that the register coalescer will remove unnecessary copies.
7986  if (VT != Value->getOperand(0).getValueType()) {
7987  unsigned index = constIndex->getAPIntValue().getLimitedValue() %
7988  VT.getVectorNumElements();
7989  N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7990  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
7991  Value, DAG.getConstant(index, dl, MVT::i32)),
7992  DAG.getConstant(index, dl, MVT::i32));
7993  } else
7994  N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7995  Value->getOperand(0), Value->getOperand(1));
7996  } else
7997  N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
7998 
7999  if (!usesOnlyOneValue) {
8000  // The dominant value was splatted as 'N', but we now have to insert
8001  // all differing elements.
8002  for (unsigned I = 0; I < NumElts; ++I) {
8003  if (Op.getOperand(I) == Value)
8004  continue;
8006  Ops.push_back(N);
8007  Ops.push_back(Op.getOperand(I));
8008  Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
8009  N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
8010  }
8011  }
8012  return N;
8013  }
8016  MVT FVT = VT.getVectorElementType().getSimpleVT();
8017  assert(FVT == MVT::f32 || FVT == MVT::f16);
8018  MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
8019  for (unsigned i = 0; i < NumElts; ++i)
8020  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
8021  Op.getOperand(i)));
8022  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
8023  SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
8024  Val = LowerBUILD_VECTOR(Val, DAG, ST);
8025  if (Val.getNode())
8026  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8027  }
8028  if (usesOnlyOneValue) {
8029  SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
8030  if (isConstant && Val.getNode())
8031  return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
8032  }
8033  }
8034 
8035  // If all elements are constants and the case above didn't get hit, fall back
8036  // to the default expansion, which will generate a load from the constant
8037  // pool.
8038  if (isConstant)
8039  return SDValue();
8040 
8041  // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8042  // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8043  // length <= 2.
8044  if (NumElts >= 4)
8045  if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8046  return shuffle;
8047 
8048  // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8049  // VCVT's
8050  if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8051  return VCVT;
8052  if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8053  return VCVT;
8054 
8055  if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8056  // If we haven't found an efficient lowering, try splitting a 128-bit vector
8057  // into two 64-bit vectors; we might discover a better way to lower it.
8058  SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8059  EVT ExtVT = VT.getVectorElementType();
8060  EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8061  SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8062  if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8063  Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8064  SDValue Upper =
8065  DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8066  if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8067  Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8068  if (Lower && Upper)
8069  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8070  }
8071 
8072  // Vectors with 32- or 64-bit elements can be built by directly assigning
8073  // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8074  // will be legalized.
8075  if (EltSize >= 32) {
8076  // Do the expansion with floating-point types, since that is what the VFP
8077  // registers are defined to use, and since i64 is not legal.
8078  EVT EltVT = EVT::getFloatingPointVT(EltSize);
8079  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8081  for (unsigned i = 0; i < NumElts; ++i)
8082  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8083  SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8084  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8085  }
8086 
8087  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8088  // know the default expansion would otherwise fall back on something even
8089  // worse. For a vector with one or two non-undef values, that's
8090  // scalar_to_vector for the elements followed by a shuffle (provided the
8091  // shuffle is valid for the target) and materialization element by element
8092  // on the stack followed by a load for everything else.
8093  if (!isConstant && !usesOnlyOneValue) {
8094  SDValue Vec = DAG.getUNDEF(VT);
8095  for (unsigned i = 0 ; i < NumElts; ++i) {
8096  SDValue V = Op.getOperand(i);
8097  if (V.isUndef())
8098  continue;
8099  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8100  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8101  }
8102  return Vec;
8103  }
8104 
8105  return SDValue();
8106 }
8107 
8108 // Gather data to see if the operation can be modelled as a
8109 // shuffle in combination with VEXTs.
8110 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8111  SelectionDAG &DAG) const {
8112  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8113  SDLoc dl(Op);
8114  EVT VT = Op.getValueType();
8115  unsigned NumElts = VT.getVectorNumElements();
8116 
8117  struct ShuffleSourceInfo {
8118  SDValue Vec;
8119  unsigned MinElt = std::numeric_limits<unsigned>::max();
8120  unsigned MaxElt = 0;
8121 
8122  // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8123  // be compatible with the shuffle we intend to construct. As a result
8124  // ShuffleVec will be some sliding window into the original Vec.
8125  SDValue ShuffleVec;
8126 
8127  // Code should guarantee that element i in Vec starts at element "WindowBase
8128  // + i * WindowScale in ShuffleVec".
8129  int WindowBase = 0;
8130  int WindowScale = 1;
8131 
8132  ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8133 
8134  bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8135  };
8136 
8137  // First gather all vectors used as an immediate source for this BUILD_VECTOR
8138  // node.
8140  for (unsigned i = 0; i < NumElts; ++i) {
8141  SDValue V = Op.getOperand(i);
8142  if (V.isUndef())
8143  continue;
8144  else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8145  // A shuffle can only come from building a vector from various
8146  // elements of other vectors.
8147  return SDValue();
8148  } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8149  // Furthermore, shuffles require a constant mask, whereas extractelts
8150  // accept variable indices.
8151  return SDValue();
8152  }
8153 
8154  // Add this element source to the list if it's not already there.
8155  SDValue SourceVec = V.getOperand(0);
8156  auto Source = llvm::find(Sources, SourceVec);
8157  if (Source == Sources.end())
8158  Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8159 
8160  // Update the minimum and maximum lane number seen.
8161  unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
8162  Source->MinElt = std::min(Source->MinElt, EltNo);
8163  Source->MaxElt = std::max(Source->MaxElt, EltNo);
8164  }
8165 
8166  // Currently only do something sane when at most two source vectors
8167  // are involved.
8168  if (Sources.size() > 2)
8169  return SDValue();
8170 
8171  // Find out the smallest element size among result and two sources, and use
8172  // it as element size to build the shuffle_vector.
8173  EVT SmallestEltTy = VT.getVectorElementType();
8174  for (auto &Source : Sources) {
8175  EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8176  if (SrcEltTy.bitsLT(SmallestEltTy))
8177  SmallestEltTy = SrcEltTy;
8178  }
8179  unsigned ResMultiplier =
8180  VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8181  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8182  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8183 
8184  // If the source vector is too wide or too narrow, we may nevertheless be able
8185  // to construct a compatible shuffle either by concatenating it with UNDEF or
8186  // extracting a suitable range of elements.
8187  for (auto &Src : Sources) {
8188  EVT SrcVT = Src.ShuffleVec.getValueType();
8189 
8190  uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8191  uint64_t VTSize = VT.getFixedSizeInBits();
8192  if (SrcVTSize == VTSize)
8193  continue;
8194 
8195  // This stage of the search produces a source with the same element type as
8196  // the original, but with a total width matching the BUILD_VECTOR output.
8197  EVT EltVT = SrcVT.getVectorElementType();
8198  unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8199  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8200 
8201  if (SrcVTSize < VTSize) {
8202  if (2 * SrcVTSize != VTSize)
8203  return SDValue();
8204  // We can pad out the smaller vector for free, so if it's part of a
8205  // shuffle...
8206  Src.ShuffleVec =
8207  DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8208  DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8209  continue;
8210  }
8211 
8212  if (SrcVTSize != 2 * VTSize)
8213  return SDValue();
8214 
8215  if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8216  // Span too large for a VEXT to cope
8217  return SDValue();
8218  }
8219 
8220  if (Src.MinElt >= NumSrcElts) {
8221  // The extraction can just take the second half
8222  Src.ShuffleVec =
8223  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8224  DAG.getConstant(NumSrcElts, dl, MVT::i32));
8225  Src.WindowBase = -NumSrcElts;
8226  } else if (Src.MaxElt < NumSrcElts) {
8227  // The extraction can just take the first half
8228  Src.ShuffleVec =
8229  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8230  DAG.getConstant(0, dl, MVT::i32));
8231  } else {
8232  // An actual VEXT is needed
8233  SDValue VEXTSrc1 =
8234  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8235  DAG.getConstant(0, dl, MVT::i32));
8236  SDValue VEXTSrc2 =
8237  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8238  DAG.getConstant(NumSrcElts, dl, MVT::i32));
8239 
8240  Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8241  VEXTSrc2,
8242  DAG.getConstant(Src.MinElt, dl, MVT::i32));
8243  Src.WindowBase = -Src.MinElt;
8244  }
8245  }
8246 
8247  // Another possible incompatibility occurs from the vector element types. We
8248  // can fix this by bitcasting the source vectors to the same type we intend
8249  // for the shuffle.
8250  for (auto &Src : Sources) {
8251  EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8252  if (SrcEltTy == SmallestEltTy)
8253  continue;
8254  assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8255  Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8256  Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8257  Src.WindowBase *= Src.WindowScale;
8258  }
8259 
8260  // Final check before we try to actually produce a shuffle.
8261  LLVM_DEBUG(for (auto Src
8262  : Sources)
8263  assert(Src.ShuffleVec.getValueType() == ShuffleVT););
8264 
8265  // The stars all align, our next step is to produce the mask for the shuffle.
8267  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8268  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8269  SDValue Entry = Op.getOperand(i);
8270  if (Entry.isUndef())
8271  continue;
8272 
8273  auto Src = llvm::find(Sources, Entry.getOperand(0));
8274  int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8275 
8276  // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8277  // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8278  // segment.
8279  EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8280  int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8281  VT.getScalarSizeInBits());
8282  int LanesDefined = BitsDefined / BitsPerShuffleLane;
8283 
8284  // This source is expected to fill ResMultiplier lanes of the final shuffle,
8285  // starting at the appropriate offset.
8286  int *LaneMask = &Mask[i * ResMultiplier];
8287 
8288  int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8289  ExtractBase += NumElts * (Src - Sources.begin());
8290  for (int j = 0; j < LanesDefined; ++j)
8291  LaneMask[j] = ExtractBase + j;
8292  }
8293 
8294 
8295  // We can't handle more than two sources. This should have already
8296  // been checked before this point.
8297  assert(Sources.size() <= 2 && "Too many sources!");
8298 
8299  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8300  for (unsigned i = 0; i < Sources.size(); ++i)
8301  ShuffleOps[i] = Sources[i].ShuffleVec;
8302 
8303  SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8304  ShuffleOps[1], Mask, DAG);
8305  if (!Shuffle)
8306  return SDValue();
8307  return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8308 }
8309 
8311  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8320  OP_VUZPL, // VUZP, left result
8321  OP_VUZPR, // VUZP, right result
8322  OP_VZIPL, // VZIP, left result
8323  OP_VZIPR, // VZIP, right result
8324  OP_VTRNL, // VTRN, left result
8325  OP_VTRNR // VTRN, right result
8326 };
8327 
8328 static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8329  unsigned OpNum = (PFEntry >> 26) & 0x0F;
8330  switch (OpNum) {
8331  case OP_COPY:
8332  case OP_VREV:
8333  case OP_VDUP0:
8334  case OP_VDUP1:
8335  case OP_VDUP2:
8336  case OP_VDUP3:
8337  return true;
8338  }
8339  return false;
8340 }
8341 
8342 /// isShuffleMaskLegal - Targets can use this to indicate that they only
8343 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8344 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8345 /// are assumed to be legal.
8347  if (VT.getVectorNumElements() == 4 &&
8348  (VT.is128BitVector() || VT.is64BitVector())) {
8349  unsigned PFIndexes[4];
8350  for (unsigned i = 0; i != 4; ++i) {
8351  if (M[i] < 0)
8352  PFIndexes[i] = 8;
8353  else
8354  PFIndexes[i] = M[i];
8355  }
8356 
8357  // Compute the index in the perfect shuffle table.
8358  unsigned PFTableIndex =
8359  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8360  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8361  unsigned Cost = (PFEntry >> 30);
8362 
8363  if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8364  return true;
8365  }
8366 
8367  bool ReverseVEXT, isV_UNDEF;
8368  unsigned Imm, WhichResult;
8369 
8370  unsigned EltSize = VT.getScalarSizeInBits();
8371  if (EltSize >= 32 ||
8374  isVREVMask(M, VT, 64) ||
8375  isVREVMask(M, VT, 32) ||
8376  isVREVMask(M, VT, 16))
8377  return true;
8378  else if (Subtarget->hasNEON() &&
8379  (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8380  isVTBLMask(M, VT) ||
8381  isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8382  return true;
8383  else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8384  isReverseMask(M, VT))
8385  return true;
8386  else if (Subtarget->hasMVEIntegerOps() &&
8387  (isVMOVNMask(M, VT, true, false) ||
8388  isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8389  return true;
8390  else if (Subtarget->hasMVEIntegerOps() &&
8391  (isTruncMask(M, VT, false, false) ||
8392  isTruncMask(M, VT, false, true) ||
8393  isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8394  return true;
8395  else
8396  return false;
8397 }
8398 
8399 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8400 /// the specified operations to build the shuffle.
8401 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8402  SDValue RHS, SelectionDAG &DAG,
8403  const SDLoc &dl) {
8404  unsigned OpNum = (PFEntry >> 26) & 0x0F;
8405  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8406  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8407 
8408  if (OpNum == OP_COPY) {
8409  if (LHSID == (1*9+2)*9+3) return LHS;
8410  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8411  return RHS;
8412  }
8413 
8414  SDValue OpLHS, OpRHS;
8415  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8416  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8417  EVT VT = OpLHS.getValueType();
8418 
8419  switch (OpNum) {
8420  default: llvm_unreachable("Unknown shuffle opcode!");
8421  case OP_VREV:
8422  // VREV divides the vector in half and swaps within the half.
8423  if (VT.getScalarSizeInBits() == 32)
8424  return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8425  // vrev <4 x i16> -> VREV32
8426  if (VT.getScalarSizeInBits() == 16)
8427  return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8428  // vrev <4 x i8> -> VREV16
8429  assert(VT.getScalarSizeInBits() == 8);
8430  return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8431  case OP_VDUP0:
8432  case OP_VDUP1:
8433  case OP_VDUP2:
8434  case OP_VDUP3:
8435  return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8436  OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8437  case OP_VEXT1:
8438  case OP_VEXT2:
8439  case OP_VEXT3:
8440  return DAG.getNode(ARMISD::VEXT, dl, VT,
8441  OpLHS, OpRHS,
8442  DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8443  case OP_VUZPL:
8444  case OP_VUZPR:
8445  return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8446  OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8447  case OP_VZIPL:
8448  case OP_VZIPR:
8449  return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8450  OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8451  case OP_VTRNL:
8452  case OP_VTRNR:
8453  return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8454  OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8455  }
8456 }
8457 
8459  ArrayRef<int> ShuffleMask,
8460  SelectionDAG &DAG) {
8461  // Check to see if we can use the VTBL instruction.
8462  SDValue V1 = Op.getOperand(0);
8463  SDValue V2 = Op.getOperand(1);
8464  SDLoc DL(Op);
8465 
8466  SmallVector<SDValue, 8> VTBLMask;
8467  for (int I : ShuffleMask)
8468  VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32));
8469 
8470  if (V2.getNode()->isUndef())
8471  return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8472  DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8473 
8474  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8475  DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8476 }
8477 
8479  SDLoc DL(Op);
8480  EVT VT = Op.getValueType();
8481 
8482  assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8483  "Expect an v8i16/v16i8 type");
8484  SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8485  // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8486  // extract the first 8 bytes into the top double word and the last 8 bytes
8487  // into the bottom double word, through a new vector shuffle that will be
8488  // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8489  std::vector<int> NewMask;
8490  for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8491  NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8492  for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8493  NewMask.push_back(i);
8494  return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8495 }
8496 
8498  switch (VT.getSimpleVT().SimpleTy) {
8499  case MVT::v2i1:
8500  return MVT::v2f64;
8501  case MVT::v4i1:
8502  return MVT::v4i32;
8503  case MVT::v8i1:
8504  return MVT::v8i16;
8505  case MVT::v16i1:
8506  return MVT::v16i8;
8507  default:
8508  llvm_unreachable("Unexpected vector predicate type");
8509  }
8510 }
8511 
8513  SelectionDAG &DAG) {
8514  // Converting from boolean predicates to integers involves creating a vector
8515  // of all ones or all zeroes and selecting the lanes based upon the real
8516  // predicate.
8517  SDValue AllOnes =
8520 
8521  SDValue AllZeroes =
8523  AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8524 
8525  // Get full vector type from predicate type
8526  EVT NewVT = getVectorTyFromPredicateVector(VT);
8527 
8528  SDValue RecastV1;
8529  // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8530  // this to a v16i1. This cannot be done with an ordinary bitcast because the
8531  // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8532  // since we know in hardware the sizes are really the same.
8533  if (VT != MVT::v16i1)
8534  RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8535  else
8536  RecastV1 = Pred;
8537 
8538  // Select either all ones or zeroes depending upon the real predicate bits.
8539  SDValue PredAsVector =
8540  DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8541 
8542  // Recast our new predicate-as-integer v16i8 vector into something
8543  // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8544  return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8545 }
8546 
8548  const ARMSubtarget *ST) {
8549  EVT VT = Op.getValueType();
8550  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8551  ArrayRef<int> ShuffleMask = SVN->getMask();
8552 
8553  assert(ST->hasMVEIntegerOps() &&
8554  "No support for vector shuffle of boolean predicates");
8555 
8556  SDValue V1 = Op.getOperand(0);
8557  SDValue V2 = Op.getOperand(1);
8558  SDLoc dl(Op);
8559  if (isReverseMask(ShuffleMask, VT)) {
8561  SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8562  SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8563  DAG.getConstant(16, dl, MVT::i32));
8564  return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8565  }
8566 
8567  // Until we can come up with optimised cases for every single vector
8568  // shuffle in existence we have chosen the least painful strategy. This is
8569  // to essentially promote the boolean predicate to a 8-bit integer, where
8570  // each predicate represents a byte. Then we fall back on a normal integer
8571  // vector shuffle and convert the result back into a predicate vector. In
8572  // many cases the generated code might be even better than scalar code
8573  // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8574  // fields in a register into 8 other arbitrary 2-bit fields!
8575  SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8576  EVT NewVT = PredAsVector1.getValueType();
8577  SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8578  : PromoteMVEPredVector(dl, V2, VT, DAG);
8579  assert(PredAsVector2.getValueType() == NewVT &&
8580  "Expected identical vector type in expanded i1 shuffle!");
8581 
8582  // Do the shuffle!
8583  SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8584  PredAsVector2, ShuffleMask);
8585 
8586  // Now return the result of comparing the shuffled vector with zero,
8587  // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8588  // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8589  if (VT == MVT::v2i1) {
8590  SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8591  SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8592  DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8593  return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8594  }
8595  return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8596  DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8597 }
8598 
8600  ArrayRef<int> ShuffleMask,
8601  SelectionDAG &DAG) {
8602  // Attempt to lower the vector shuffle using as many whole register movs as
8603  // possible. This is useful for types smaller than 32bits, which would
8604  // often otherwise become a series for grp movs.
8605  SDLoc dl(Op);
8606  EVT VT = Op.getValueType();
8607  if (VT.getScalarSizeInBits() >= 32)
8608  return SDValue();
8609 
8610  assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8611  "Unexpected vector type");
8612  int NumElts = VT.getVectorNumElements();
8613  int QuarterSize = NumElts / 4;
8614  // The four final parts of the vector, as i32's
8615  SDValue Parts[4];
8616 
8617  // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8618  // <u,u,u,u>), returning the vmov lane index
8619  auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8620  // Detect which mov lane this would be from the first non-undef element.
8621  int MovIdx = -1;
8622  for (int i = 0; i < Length; i++) {
8623  if (ShuffleMask[Start + i] >= 0) {
8624  if (ShuffleMask[Start + i] % Length != i)
8625  return -1;
8626  MovIdx = ShuffleMask[Start + i] / Length;
8627  break;
8628  }
8629  }
8630  // If all items are undef, leave this for other combines
8631  if (MovIdx == -1)
8632  return -1;
8633  // Check the remaining values are the correct part of the same mov
8634  for (int i = 1; i < Length; i++) {
8635  if (ShuffleMask[Start + i] >= 0 &&
8636  (ShuffleMask[Start + i] / Length != MovIdx ||
8637  ShuffleMask[Start + i] % Length != i))
8638  return -1;
8639  }
8640  return MovIdx;
8641  };
8642 
8643  for (int Part = 0; Part < 4; ++Part) {
8644  // Does this part look like a mov
8645  int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8646  if (Elt != -1) {
8647  SDValue Input = Op->getOperand(0);
8648  if (Elt >= 4) {
8649  Input = Op->getOperand(1);
8650  Elt -= 4;
8651  }
8652  SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8653  Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8654  DAG.getConstant(Elt, dl, MVT::i32));
8655  }
8656  }
8657 
8658  // Nothing interesting found, just return
8659  if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8660  return SDValue();
8661 
8662  // The other parts need to be built with the old shuffle vector, cast to a
8663  // v4i32 and extract_vector_elts
8664  if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8665  SmallVector<int, 16> NewShuffleMask;
8666  for (int Part = 0; Part < 4; ++Part)
8667  for (int i = 0; i < QuarterSize; i++)
8668  NewShuffleMask.push_back(
8669  Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8670  SDValue NewShuffle = DAG.getVectorShuffle(
8671  VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8672  SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8673 
8674  for (int Part = 0; Part < 4; ++Part)
8675  if (!Parts[Part])
8676  Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8677  BitCast, DAG.getConstant(Part, dl, MVT::i32));
8678  }
8679  // Build a vector out of the various parts and bitcast it back to the original
8680  // type.
8681  SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8682  return DAG.getBitcast(VT, NewVec);
8683 }
8684 
8686  ArrayRef<int> ShuffleMask,
8687  SelectionDAG &DAG) {
8688  SDValue V1 = Op.getOperand(0);
8689  SDValue V2 = Op.getOperand(1);
8690  EVT VT = Op.getValueType();
8691  unsigned NumElts = VT.getVectorNumElements();
8692 
8693  // An One-Off Identity mask is one that is mostly an identity mask from as
8694  // single source but contains a single element out-of-place, either from a
8695  // different vector or from another position in the same vector. As opposed to
8696  // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8697  // pair directly.
8698  auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8699  int &OffElement) {
8700  OffElement = -1;
8701  int NonUndef = 0;
8702  for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8703  if (Mask[i] == -1)
8704  continue;
8705  NonUndef++;
8706  if (Mask[i] != i + BaseOffset) {
8707  if (OffElement == -1)
8708  OffElement = i;
8709  else
8710  return false;
8711  }
8712  }
8713  return NonUndef > 2 && OffElement != -1;
8714  };
8715  int OffElement;
8716  SDValue VInput;
8717  if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8718  VInput = V1;
8719  else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8720  VInput = V2;
8721  else
8722  return SDValue();
8723 
8724  SDLoc dl(Op);
8725  EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8726  ? MVT::i32
8727  : VT.getScalarType();
8728  SDValue Elt = DAG.getNode(
8729  ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8730  ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8731  DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8732  return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8733  DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8734 }
8735 
8737  const ARMSubtarget *ST) {
8738  SDValue V1 = Op.getOperand(0);
8739  SDValue V2 = Op.getOperand(1);
8740  SDLoc dl(Op);
8741  EVT VT = Op.getValueType();
8742  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8743  unsigned EltSize = VT.getScalarSizeInBits();
8744 
8745  if (ST->hasMVEIntegerOps() && EltSize == 1)
8746  return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8747 
8748  // Convert shuffles that are directly supported on NEON to target-specific
8749  // DAG nodes, instead of keeping them as shuffles and matching them again
8750  // during code selection. This is more efficient and avoids the possibility
8751  // of inconsistencies between legalization and selection.
8752  // FIXME: floating-point vectors should be canonicalized to integer vectors
8753  // of the same time so that they get CSEd properly.
8754  ArrayRef<int> ShuffleMask = SVN->getMask();
8755 
8756  if (EltSize <= 32) {
8757  if (SVN->isSplat()) {
8758  int Lane = SVN->getSplatIndex();
8759  // If this is undef splat, generate it via "just" vdup, if possible.
8760  if (Lane == -1) Lane = 0;
8761 
8762  // Test if V1 is a SCALAR_TO_VECTOR.
8763  if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8764  return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8765  }
8766  // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8767  // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8768  // reaches it).
8769  if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8770  !isa<ConstantSDNode>(V1.getOperand(0))) {
8771  bool IsScalarToVector = true;
8772  for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8773  if (!V1.getOperand(i).isUndef()) {
8774  IsScalarToVector = false;
8775  break;
8776  }
8777  if (IsScalarToVector)
8778  return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8779  }
8780  return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8781  DAG.getConstant(Lane, dl, MVT::i32));
8782  }
8783 
8784  bool ReverseVEXT = false;
8785  unsigned Imm = 0;
8786  if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8787  if (ReverseVEXT)
8788  std::swap(V1, V2);
8789  return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8790  DAG.getConstant(Imm, dl, MVT::i32));
8791  }
8792 
8793  if (isVREVMask(ShuffleMask, VT, 64))
8794  return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8795  if (isVREVMask(ShuffleMask, VT, 32))
8796  return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8797  if (isVREVMask(ShuffleMask, VT, 16))
8798  return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8799 
8800  if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8801  return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8802  DAG.getConstant(Imm, dl, MVT::i32));
8803  }
8804 
8805  // Check for Neon shuffles that modify both input vectors in place.
8806  // If both results are used, i.e., if there are two shuffles with the same
8807  // source operands and with masks corresponding to both results of one of
8808  // these operations, DAG memoization will ensure that a single node is
8809  // used for both shuffles.
8810  unsigned WhichResult = 0;
8811  bool isV_UNDEF = false;
8812  if (ST->hasNEON()) {
8813  if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8814  ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8815  if (isV_UNDEF)
8816  V2 = V1;
8817  return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8818  .getValue(WhichResult);
8819  }
8820  }
8821  if (ST->hasMVEIntegerOps()) {
8822  if (isVMOVNMask(ShuffleMask, VT, false, false))
8823  return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8824  DAG.getConstant(0, dl, MVT::i32));
8825  if (isVMOVNMask(ShuffleMask, VT, true, false))
8826  return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8827  DAG.getConstant(1, dl, MVT::i32));
8828  if (isVMOVNMask(ShuffleMask, VT, true, true))
8829  return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8830  DAG.getConstant(1, dl, MVT::i32));
8831  }
8832 
8833  // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8834  // shuffles that produce a result larger than their operands with:
8835  // shuffle(concat(v1, undef), concat(v2, undef))
8836  // ->
8837  // shuffle(concat(v1, v2), undef)
8838  // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8839  //
8840  // This is useful in the general case, but there are special cases where
8841  // native shuffles produce larger results: the two-result ops.
8842  //
8843  // Look through the concat when lowering them:
8844  // shuffle(concat(v1, v2), undef)
8845  // ->
8846  // concat(VZIP(v1, v2):0, :1)
8847  //
8848  if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8849  SDValue SubV1 = V1->getOperand(0);
8850  SDValue SubV2 = V1->getOperand(1);
8851  EVT SubVT = SubV1.getValueType();
8852 
8853  // We expect these to have been canonicalized to -1.
8854  assert(llvm::all_of(ShuffleMask, [&](int i) {
8855  return i < (int)VT.getVectorNumElements();
8856  }) && "Unexpected shuffle index into UNDEF operand!");
8857 
8858  if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8859  ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8860  if (isV_UNDEF)
8861  SubV2 = SubV1;
8862  assert((WhichResult == 0) &&
8863  "In-place shuffle of concat can only have one result!");
8864  SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8865  SubV1, SubV2);
8866  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8867  Res.getValue(1));
8868  }
8869  }
8870  }
8871 
8872  if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8873  if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8874  return V;
8875 
8876  for (bool Top : {false, true}) {
8877  for (bool SingleSource : {false, true}) {
8878  if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8879  MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8880  MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8881  SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8882  SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8883  SingleSource ? V1 : V2);
8884  if (Top) {
8885  SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8886  Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8887  Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8888  }
8889  return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8890  }
8891  }
8892  }
8893  }
8894 
8895  // If the shuffle is not directly supported and it has 4 elements, use
8896  // the PerfectShuffle-generated table to synthesize it from other shuffles.
8897  unsigned NumElts = VT.getVectorNumElements();
8898  if (NumElts == 4) {
8899  unsigned PFIndexes[4];
8900  for (unsigned i = 0; i != 4; ++i) {
8901  if (ShuffleMask[i] < 0)
8902  PFIndexes[i] = 8;
8903  else
8904  PFIndexes[i] = ShuffleMask[i];
8905  }
8906 
8907  // Compute the index in the perfect shuffle table.
8908  unsigned PFTableIndex =
8909  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8910  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8911  unsigned Cost = (PFEntry >> 30);
8912 
8913  if (Cost <= 4) {
8914  if (ST->hasNEON())
8915  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8916  else if (isLegalMVEShuffleOp(PFEntry)) {
8917  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8918  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8919  unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
8920  unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
8921  if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
8922  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8923  }
8924  }
8925  }
8926 
8927  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
8928  if (EltSize >= 32) {
8929  // Do the expansion with floating-point types, since that is what the VFP
8930  // registers are defined to use, and since i64 is not legal.
8931  EVT EltVT = EVT::getFloatingPointVT(EltSize);
8932  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8933  V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
8934  V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
8936  for (unsigned i = 0; i < NumElts; ++i) {
8937  if (ShuffleMask[i] < 0)
8938  Ops.push_back(DAG.getUNDEF(EltVT));
8939  else
8940  Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
8941  ShuffleMask[i] < (int)NumElts ? V1 : V2,
8942  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
8943  dl, MVT::i32)));
8944  }
8945  SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8946  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8947  }
8948 
8949  if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8950  isReverseMask(ShuffleMask, VT))
8951  return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
8952 
8953  if (ST->hasNEON() && VT == MVT::v8i8)
8954  if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
8955  return NewOp;
8956 
8957  if (ST->hasMVEIntegerOps())
8958  if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
8959  return NewOp;
8960 
8961  return SDValue();
8962 }
8963 
8965  const ARMSubtarget *ST) {
8966  EVT VecVT = Op.getOperand(0).getValueType();
8967  SDLoc dl(Op);
8968 
8969  assert(ST->hasMVEIntegerOps() &&
8970  "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8971 
8972  SDValue Conv =
8973  DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
8974  unsigned Lane = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
8975  unsigned LaneWidth =
8977  unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
8979  Op.getOperand(1), DAG.getValueType(MVT::i1));
8980  SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
8981  DAG.getConstant(~Mask, dl, MVT::i32));
8982  return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
8983 }
8984 
8985 SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8986  SelectionDAG &DAG) const {
8987  // INSERT_VECTOR_ELT is legal only for immediate indexes.
8988  SDValue Lane = Op.getOperand(2);
8989  if (!isa<ConstantSDNode>(Lane))
8990  return SDValue();
8991 
8992  SDValue Elt = Op.getOperand(1);
8993  EVT EltVT = Elt.getValueType();
8994 
8995  if (Subtarget->hasMVEIntegerOps() &&
8996  Op.getValueType().getScalarSizeInBits() == 1)
8997  return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
8998 
8999  if (getTypeAction(*DAG.getContext(), EltVT) ==
9001  // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
9002  // but the type system will try to do that if we don't intervene.
9003  // Reinterpret any such vector-element insertion as one with the
9004  // corresponding integer types.
9005 
9006  SDLoc dl(Op);
9007 
9008  EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
9009  assert(getTypeAction(*DAG.getContext(), IEltVT) !=
9011 
9012  SDValue VecIn = Op.getOperand(0);
9013  EVT VecVT = VecIn.getValueType();
9014  EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
9015  VecVT.getVectorNumElements());
9016 
9017  SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
9018  SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
9019  SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
9020  IVecIn, IElt, Lane);
9021  return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
9022  }
9023 
9024  return Op;
9025 }
9026 
9028  const ARMSubtarget *ST) {
9029  EVT VecVT = Op.getOperand(0).getValueType();
9030  SDLoc dl(Op);
9031 
9032  assert(ST->hasMVEIntegerOps() &&
9033  "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9034 
9035  SDValue Conv =
9036  DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9037  unsigned Lane = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
9038  unsigned LaneWidth =
9040  SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9041  DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9042  return Shift;
9043 }
9044 
9046  const ARMSubtarget *ST) {
9047  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9048  SDValue Lane = Op.getOperand(1);
9049  if (!isa<ConstantSDNode>(Lane))
9050  return SDValue();
9051 
9052  SDValue Vec = Op.getOperand(0);
9053  EVT VT = Vec.getValueType();
9054 
9055  if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9056  return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9057 
9058  if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9059  SDLoc dl(Op);
9060  return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9061  }
9062 
9063  return Op;
9064 }
9065 
9067  const ARMSubtarget *ST) {
9068  SDLoc dl(Op);
9069  assert(Op.getValueType().getScalarSizeInBits() == 1 &&
9070  "Unexpected custom CONCAT_VECTORS lowering");
9071  assert(isPowerOf2_32(Op.getNumOperands()) &&
9072  "Unexpected custom CONCAT_VECTORS lowering");
9073  assert(ST->hasMVEIntegerOps() &&
9074  "CONCAT_VECTORS lowering only supported for MVE");
9075 
9076  auto ConcatPair = [&](SDValue V1, SDValue V2) {
9077  EVT Op1VT = V1.getValueType();
9078  EVT Op2VT = V2.getValueType();
9079  assert(Op1VT == Op2VT && "Operand types don't match!");
9080  EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9081 
9082  SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9083  SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9084 
9085  // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9086  // promoted to v8i16, etc.
9087  MVT ElType =
9089  unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9090 
9091  // Extract the vector elements from Op1 and Op2 one by one and truncate them
9092  // to be the right size for the destination. For example, if Op1 is v4i1
9093  // then the promoted vector is v4i32. The result of concatenation gives a
9094  // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9095  // needs truncating to i16 and inserting in the result.
9096  EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9097  SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9098  auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9099  EVT NewVT = NewV.getValueType();
9100  EVT ConcatVT = ConVec.getValueType();
9101  for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9102  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9103  DAG.getIntPtrConstant(i, dl));
9104  ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9105  DAG.getConstant(j, dl, MVT::i32));
9106  }
9107  return ConVec;
9108  };
9109  unsigned j = 0;
9110  ConVec = ExtractInto(NewV1, ConVec, j);
9111  ConVec = ExtractInto(NewV2, ConVec, j);
9112 
9113  // Now return the result of comparing the subvector with zero, which will
9114  // generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1 we
9115  // convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
9116  if (VT == MVT::v2i1) {
9117  SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, ConVec);
9118  SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
9119  DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9120  return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9121  }
9122  return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9123  DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9124  };
9125 
9126  // Concat each pair of subvectors and pack into the lower half of the array.
9127  SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
9128  while (ConcatOps.size() > 1) {
9129  for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9130  SDValue V1 = ConcatOps[I];
9131  SDValue V2 = ConcatOps[I + 1];
9132  ConcatOps[I / 2] = ConcatPair(V1, V2);
9133  }
9134  ConcatOps.resize(ConcatOps.size() / 2);
9135  }
9136  return ConcatOps[0];
9137 }
9138 
9140  const ARMSubtarget *ST) {
9141  EVT VT = Op->getValueType(0);
9142  if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9143  return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9144 
9145  // The only time a CONCAT_VECTORS operation can have legal types is when
9146  // two 64-bit vectors are concatenated to a 128-bit vector.
9147  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
9148  "unexpected CONCAT_VECTORS");
9149  SDLoc dl(Op);
9150  SDValue Val = DAG.getUNDEF(MVT::v2f64);
9151  SDValue Op0 = Op.getOperand(0);
9152  SDValue Op1 = Op.getOperand(1);
9153  if (!Op0.isUndef())
9154  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9155  DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9156  DAG.getIntPtrConstant(0, dl));
9157  if (!Op1.isUndef())
9158  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9159  DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9160  DAG.getIntPtrConstant(1, dl));
9161  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9162 }
9163 
9165  const ARMSubtarget *ST) {
9166  SDValue V1 = Op.getOperand(0);
9167  SDValue V2 = Op.getOperand(1);
9168  SDLoc dl(Op);
9169  EVT VT = Op.getValueType();
9170  EVT Op1VT = V1.getValueType();
9171  unsigned NumElts = VT.getVectorNumElements();
9172  unsigned Index = cast<ConstantSDNode>(V2)->getZExtValue();
9173 
9174  assert(VT.getScalarSizeInBits() == 1 &&
9175  "Unexpected custom EXTRACT_SUBVECTOR lowering");
9176  assert(ST->hasMVEIntegerOps() &&
9177  "EXTRACT_SUBVECTOR lowering only supported for MVE");
9178 
9179  SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9180 
9181  // We now have Op1 promoted to a vector of integers, where v8i1 gets
9182  // promoted to v8i16, etc.
9183 
9185 
9186  if (NumElts == 2) {
9187  EVT SubVT = MVT::v4i32;
9188  SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9189  for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9190  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9191  DAG.getIntPtrConstant(i, dl));
9192  SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9193  DAG.getConstant(j, dl, MVT::i32));
9194  SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9195  DAG.getConstant(j + 1, dl, MVT::i32));
9196  }
9197  SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9198  DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9199  return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9200  }
9201 
9202  EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9203  SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9204  for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9205  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9206  DAG.getIntPtrConstant(i, dl));
9207  SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9208  DAG.getConstant(j, dl, MVT::i32));
9209  }
9210 
9211  // Now return the result of comparing the subvector with zero,
9212  // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9213  return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9214  DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9215 }
9216 
9217 // Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9219  const ARMSubtarget *ST) {
9220  assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9221  EVT VT = N->getValueType(0);
9222  assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9223  "Expected a vector i1 type!");
9224  SDValue Op = N->getOperand(0);
9225  EVT FromVT = Op.getValueType();
9226  SDLoc DL(N);
9227 
9228  SDValue And =
9229  DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9230  return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9231  DAG.getCondCode(ISD::SETNE));
9232 }
9233 
9235  const ARMSubtarget *Subtarget) {
9236  if (!Subtarget->hasMVEIntegerOps())
9237  return SDValue();
9238 
9239  EVT ToVT = N->getValueType(0);
9240  if (ToVT.getScalarType() == MVT::i1)
9241  return LowerTruncatei1(N, DAG, Subtarget);
9242 
9243  // MVE does not have a single instruction to perform the truncation of a v4i32
9244  // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9245  // Most of the instructions in MVE follow the 'Beats' system, where moving
9246  // values from different lanes is usually something that the instructions
9247  // avoid.
9248  //
9249  // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9250  // which take a the top/bottom half of a larger lane and extend it (or do the
9251  // opposite, truncating into the top/bottom lane from a larger lane). Note
9252  // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9253  // bottom 16bits from each vector lane. This works really well with T/B
9254  // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9255  // to move order.
9256  //
9257  // But truncates and sext/zext are always going to be fairly common from llvm.
9258  // We have several options for how to deal with them:
9259  // - Wherever possible combine them into an instruction that makes them
9260  // "free". This includes loads/stores, which can perform the trunc as part
9261  // of the memory operation. Or certain shuffles that can be turned into
9262  // VMOVN/VMOVL.
9263  // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9264  // trunc(mul(sext(a), sext(b))) may become
9265  // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9266  // this case can use VMULL). This is performed in the
9267  // MVELaneInterleavingPass.
9268  // - Otherwise we have an option. By default we would expand the
9269  // zext/sext/trunc into a series of lane extract/inserts going via GPR
9270  // registers. One for each vector lane in the vector. This can obviously be
9271  // very expensive.
9272  // - The other option is to use the fact that loads/store can extend/truncate
9273  // to turn a trunc into two truncating stack stores and a stack reload. This
9274  // becomes 3 back-to-back memory operations, but at least that is less than
9275  // all the insert/extracts.
9276  //
9277  // In order to do the last, we convert certain trunc's into MVETRUNC, which
9278  // are either optimized where they can be, or eventually lowered into stack
9279  // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9280  // two early, where other instructions would be better, and stops us from
9281  // having to reconstruct multiple buildvector shuffles into loads/stores.
9282  if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9283  return SDValue();
9284  EVT FromVT = N->getOperand(0).getValueType();
9285  if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9286  return SDValue();
9287 
9288  SDValue Lo, Hi;
9289  std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9290  SDLoc DL(N);
9291  return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9292 }
9293 
9295  const ARMSubtarget *Subtarget) {
9296  if (!Subtarget->hasMVEIntegerOps())
9297  return SDValue();
9298 
9299  // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9300 
9301  EVT ToVT = N->getValueType(0);
9302  if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9303  return SDValue();
9304  SDValue Op = N->getOperand(0);
9305  EVT FromVT = Op.getValueType();
9306  if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9307  return SDValue();
9308 
9309  SDLoc DL(N);
9310  EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9311  if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9312  ExtVT = MVT::v8i16;
9313 
9314  unsigned Opcode =
9315  N->getOpcode() == ISD::SIGN_EXTEND ? ARMISD::MVESEXT : ARMISD::MVEZEXT;
9316  SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9317  SDValue Ext1 = Ext.getValue(1);
9318 
9319  if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9320  Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9321  Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9322  }
9323 
9324  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9325 }
9326 
9327 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9328 /// element has been zero/sign-extended, depending on the isSigned parameter,
9329 /// from an integer type half its size.
9331  bool isSigned) {
9332  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9333  EVT VT = N->getValueType(0);
9334  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9335  SDNode *BVN = N->getOperand(0).getNode();
9336  if (BVN->getValueType(0) != MVT::v4i32 ||
9337  BVN->getOpcode() != ISD::BUILD_VECTOR)
9338  return false;
9339  unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9340  unsigned HiElt = 1 - LoElt;
9341  ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9342  ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9343  ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9344  ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9345  if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9346  return false;
9347  if (isSigned) {
9348  if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9349  Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9350  return true;
9351  } else {
9352  if (Hi0->isZero() && Hi1->isZero())
9353  return true;
9354  }
9355  return false;
9356  }
9357 
9358  if (N->getOpcode() != ISD::BUILD_VECTOR)
9359  return false;
9360 
9361  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9362  SDNode *Elt = N->getOperand(i).getNode();
9363  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9364  unsigned EltSize = VT.getScalarSizeInBits();
9365  unsigned HalfSize = EltSize / 2;
9366  if (isSigned) {
9367  if (!isIntN(HalfSize, C->getSExtValue()))
9368  return false;
9369  } else {
9370  if (!isUIntN(HalfSize, C->getZExtValue()))
9371  return false;
9372  }
9373  continue;
9374  }
9375  return false;
9376  }
9377 
9378  return true;
9379 }
9380 
9381 /// isSignExtended - Check if a node is a vector value that is sign-extended
9382 /// or a constant BUILD_VECTOR with sign-extended elements.
9383 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
9384  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9385  return true;
9386  if (isExtendedBUILD_VECTOR(N, DAG, true))
9387  return true;
9388  return false;
9389 }
9390 
9391 /// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9392 /// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9393 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
9394  if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9395  ISD::isZEXTLoad(N))
9396  return true;
9397  if (isExtendedBUILD_VECTOR(N, DAG, false))
9398  return true;
9399  return false;
9400 }
9401 
9402 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9403  if (OrigVT.getSizeInBits() >= 64)
9404  return OrigVT;
9405 
9406  assert(OrigVT.isSimple() && "Expecting a simple value type");
9407 
9408  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9409  switch (OrigSimpleTy) {
9410  default: llvm_unreachable("Unexpected Vector Type");
9411  case MVT::v2i8:
9412  case MVT::v2i16:
9413  return MVT::v2i32;
9414  case MVT::v4i8:
9415  return MVT::v4i16;
9416  }
9417 }
9418 
9419 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9420 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9421 /// We insert the required extension here to get the vector to fill a D register.
9423  const EVT &OrigTy,
9424  const EVT &ExtTy,
9425  unsigned ExtOpcode) {
9426  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9427  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9428  // 64-bits we need to insert a new extension so that it will be 64-bits.
9429  assert(ExtTy.is128BitVector() && "Unexpected extension size");
9430  if (OrigTy.getSizeInBits() >= 64)
9431  return N;
9432 
9433  // Must extend size to at least 64 bits to be used as an operand for VMULL.
9434  EVT NewVT = getExtensionTo64Bits(OrigTy);
9435 
9436  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9437 }
9438 
9439 /// SkipLoadExtensionForVMULL - return a load of the original vector size that
9440 /// does not do any sign/zero extension. If the original vector is less
9441 /// than 64 bits, an appropriate extension will be added after the load to
9442 /// reach a total size of 64 bits. We have to add the extension separately
9443 /// because ARM does not have a sign/zero extending load for vectors.
9445  EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9446 
9447  // The load already has the right type.
9448  if (ExtendedTy == LD->getMemoryVT())
9449  return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9450  LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9451  LD->getMemOperand()->getFlags());
9452 
9453  // We need to create a zextload/sextload. We cannot just create a load
9454  // followed by a zext/zext node because LowerMUL is also run during normal
9455  // operation legalization where we can't create illegal types.
9456  return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9457  LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9458  LD->getMemoryVT(), LD->getAlign(),
9459  LD->getMemOperand()->getFlags());
9460 }
9461 
9462 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9463 /// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9464 /// the unextended value. The unextended vector should be 64 bits so that it can
9465 /// be used as an operand to a VMULL instruction. If the original vector size
9466 /// before extension is less than 64 bits we add a an extension to resize
9467 /// the vector to 64 bits.
9469  if (N->getOpcode() == ISD::SIGN_EXTEND ||
9470  N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9471  return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9472  N->getOperand(0)->getValueType(0),
9473  N->getValueType(0),
9474  N->getOpcode());
9475 
9476  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9478  "Expected extending load");
9479 
9480  SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9481  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9482  unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9483  SDValue extLoad =
9484  DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9485  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9486 
9487  return newLoad;
9488  }
9489 
9490  // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9491  // have been legalized as a BITCAST from v4i32.
9492  if (N->getOpcode() == ISD::BITCAST) {
9493  SDNode *BVN = N->getOperand(0).getNode();
9494  assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
9495  BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9496  unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9497  return DAG.getBuildVector(
9498  MVT::v2i32, SDLoc(N),
9499  {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9500  }
9501  // Construct a new BUILD_VECTOR with elements truncated to half the size.
9502  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9503  EVT VT = N->getValueType(0);
9504  unsigned EltSize = VT.getScalarSizeInBits() / 2;
9505  unsigned NumElts = VT.getVectorNumElements();
9506  MVT TruncVT = MVT::getIntegerVT(EltSize);
9508  SDLoc dl(N);
9509  for (unsigned i = 0; i != NumElts; ++i) {
9510  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
9511  const APInt &CInt = C->getAPIntValue();
9512  // Element types smaller than 32 bits are not legal, so use i32 elements.
9513  // The values are implicitly truncated so sext vs. zext doesn't matter.
9514  Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9515  }
9516  return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9517 }
9518 
9519 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9520  unsigned Opcode = N->getOpcode();
9521  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9522  SDNode *N0 = N->getOperand(0).getNode();
9523  SDNode *N1 = N->getOperand(1).getNode();
9524  return N0->hasOneUse() && N1->hasOneUse() &&
9525  isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9526  }
9527  return false;
9528 }
9529 
9530 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9531  unsigned Opcode = N->getOpcode();
9532  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9533  SDNode *N0 = N->getOperand(0).getNode();
9534  SDNode *N1 = N->getOperand(1).getNode();
9535  return N0->hasOneUse() && N1->hasOneUse() &&
9536  isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9537  }
9538  return false;
9539 }
9540 
9542  // Multiplications are only custom-lowered for 128-bit vectors so that
9543  // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9544  EVT VT = Op.getValueType();
9545  assert(VT.is128BitVector() && VT.isInteger() &&
9546  "unexpected type for custom-lowering ISD::MUL");
9547  SDNode *N0 = Op.getOperand(0).getNode();
9548  SDNode *N1 = Op.getOperand(1).getNode();
9549  unsigned NewOpc = 0;
9550  bool isMLA = false;
9551  bool isN0SExt = isSignExtended(N0, DAG);
9552  bool isN1SExt = isSignExtended(N1, DAG);
9553  if (isN0SExt && isN1SExt)
9554  NewOpc = ARMISD::VMULLs;
9555  else {
9556  bool isN0ZExt = isZeroExtended(N0, DAG);
9557  bool isN1ZExt = isZeroExtended(N1, DAG);
9558  if (isN0ZExt && isN1ZExt)
9559  NewOpc = ARMISD::VMULLu;
9560  else if (isN1SExt || isN1ZExt) {
9561  // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9562  // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9563  if (isN1SExt && isAddSubSExt(N0, DAG)) {
9564  NewOpc = ARMISD::VMULLs;
9565  isMLA = true;
9566  } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9567  NewOpc = ARMISD::VMULLu;
9568  isMLA = true;
9569  } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9570  std::swap(N0, N1);
9571  NewOpc = ARMISD::VMULLu;
9572  isMLA = true;
9573  }
9574  }
9575 
9576  if (!NewOpc) {
9577  if (VT == MVT::v2i64)
9578  // Fall through to expand this. It is not legal.
9579  return SDValue();
9580  else
9581  // Other vector multiplications are legal.
9582  return Op;
9583  }
9584  }
9585 
9586  // Legalize to a VMULL instruction.
9587  SDLoc DL(Op);
9588  SDValue Op0;
9589  SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9590  if (!isMLA) {
9591  Op0 = SkipExtensionForVMULL(N0, DAG);
9592  assert(Op0.getValueType().is64BitVector() &&
9593  Op1.getValueType().is64BitVector() &&
9594  "unexpected types for extended operands to VMULL");
9595  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9596  }
9597 
9598  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9599  // isel lowering to take advantage of no-stall back to back vmul + vmla.
9600  // vmull q0, d4, d6
9601  // vmlal q0, d5, d6
9602  // is faster than
9603  // vaddl q0, d4, d5
9604  // vmovl q1, d6
9605  // vmul q0, q0, q1
9606  SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9607  SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9608  EVT Op1VT = Op1.getValueType();
9609  return DAG.getNode(N0->getOpcode(), DL, VT,
9610  DAG.getNode(NewOpc, DL, VT,
9611  DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9612  DAG.getNode(NewOpc, DL, VT,
9613  DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9614 }
9615 
9617  SelectionDAG &DAG) {
9618  // TODO: Should this propagate fast-math-flags?
9619 
9620  // Convert to float
9621  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9622  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9623  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9624  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9625  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9626  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9627  // Get reciprocal estimate.
9628  // float4 recip = vrecpeq_f32(yf);
9630  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9631  Y);
9632  // Because char has a smaller range than uchar, we can actually get away
9633  // without any newton steps. This requires that we use a weird bias
9634  // of 0xb000, however (again, this has been exhaustively tested).
9635  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9636  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9637  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9638  Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9639  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9640  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9641  // Convert back to short.
9642  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9643  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9644  return X;
9645 }
9646 
9647 static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
9648  SelectionDAG &DAG) {
9649  // TODO: Should this propagate fast-math-flags?
9650 
9651  SDValue N2;
9652  // Convert to float.
9653  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9654  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9655  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9656  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9657  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9658  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9659 
9660  // Use reciprocal estimate and one refinement step.
9661  // float4 recip = vrecpeq_f32(yf);
9662  // recip *= vrecpsq_f32(yf, recip);
9664  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9665  N1);
9667  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9668  N1, N2);
9669  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9670  // Because short has a smaller range than ushort, we can actually get away
9671  // with only a single newton step. This requires that we use a weird bias
9672  // of 89, however (again, this has been exhaustively tested).
9673  // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9674  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9675  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9676  N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9677  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9678  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9679  // Convert back to integer and return.
9680  // return vmovn_s32(vcvt_s32_f32(result));
9681  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9682  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9683  return N0;
9684 }
9685 
9687  const ARMSubtarget *ST) {
9688  EVT VT = Op.getValueType();
9689  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9690  "unexpected type for custom-lowering ISD::SDIV");
9691 
9692  SDLoc dl(Op);
9693  SDValue N0 = Op.getOperand(0);
9694  SDValue N1 = Op.getOperand(1);
9695  SDValue N2, N3;
9696 
9697  if (VT == MVT::v8i8) {
9698  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9699  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9700 
9701  N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9702  DAG.getIntPtrConstant(4, dl));
9703  N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9704  DAG.getIntPtrConstant(4, dl));
9705  N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9706  DAG.getIntPtrConstant(0, dl));
9707  N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9708  DAG.getIntPtrConstant(0, dl));
9709 
9710  N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9711  N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9712 
9713  N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9714  N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9715 
9716  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9717  return N0;
9718  }
9719  return LowerSDIV_v4i16(N0, N1, dl, DAG);
9720 }
9721 
9723  const ARMSubtarget *ST) {
9724  // TODO: Should this propagate fast-math-flags?
9725  EVT VT = Op.getValueType();
9726  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9727  "unexpected type for custom-lowering ISD::UDIV");
9728 
9729  SDLoc dl(Op);
9730  SDValue N0 = Op.getOperand(0);
9731  SDValue N1 = Op.getOperand(1);
9732  SDValue N2, N3;
9733 
9734  if (VT == MVT::v8i8) {
9735  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9736  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9737 
9738  N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9739  DAG.getIntPtrConstant(4, dl));
9740  N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9741  DAG.getIntPtrConstant(4, dl));
9742  N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9743  DAG.getIntPtrConstant(0, dl));
9744  N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9745  DAG.getIntPtrConstant(0, dl));
9746 
9747  N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9748  N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9749 
9750  N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9751  N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9752 
9754  DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9755  MVT::i32),
9756  N0);
9757  return N0;
9758  }
9759 
9760  // v4i16 sdiv ... Convert to float.
9761  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9762  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9763  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9764  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9765  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9766  SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9767 
9768  // Use reciprocal estimate and two refinement steps.
9769  // float4 recip = vrecpeq_f32(yf);
9770  // recip *= vrecpsq_f32(yf, recip);
9771  // recip *= vrecpsq_f32(yf, recip);
9773  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9774  BN1);
9776  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9777  BN1, N2);
9778  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9780  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9781  BN1, N2);
9782  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9783  // Simply multiplying by the reciprocal estimate can leave us a few ulps
9784  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9785  // and that it will never cause us to return an answer too large).
9786  // float4 result = as_float4(as_int4(xf*recip) + 2);
9787  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9788  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9789  N1 = DAG.getConstant(2, dl, MVT::v4i32);
9790  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9791  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9792  // Convert back to integer and return.
9793  // return vmovn_u32(vcvt_s32_f32(result));
9794  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9795  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9796  return N0;
9797 }
9798 
9800  SDNode *N = Op.getNode();
9801  EVT VT = N->getValueType(0);
9802  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9803 
9804  SDValue Carry = Op.getOperand(2);
9805 
9806  SDLoc DL(Op);
9807 
9808  SDValue Result;
9809  if (Op.getOpcode() == ISD::ADDCARRY) {
9810  // This converts the boolean value carry into the carry flag.
9811  Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9812 
9813  // Do the addition proper using the carry flag we wanted.
9814  Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9815  Op.getOperand(1), Carry);
9816 
9817  // Now convert the carry flag into a boolean value.
9818  Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9819  } else {
9820  // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
9821  // have to invert the carry first.
9822  Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9823  DAG.getConstant(1, DL, MVT::i32), Carry);
9824  // This converts the boolean value carry into the carry flag.
9825  Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9826 
9827  // Do the subtraction proper using the carry flag we wanted.
9828  Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9829  Op.getOperand(1), Carry);
9830 
9831  // Now convert the carry flag into a boolean value.
9832  Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9833  // But the carry returned by ARMISD::SUBE is not a borrow as expected
9834  // by ISD::SUBCARRY, so compute 1 - C.
9835  Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9836  DAG.getConstant(1, DL, MVT::i32), Carry);
9837  }
9838 
9839  // Return both values.
9840  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9841 }
9842 
9843 SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9844  assert(Subtarget->isTargetDarwin());
9845 
9846  // For iOS, we want to call an alternative entry point: __sincos_stret,
9847  // return values are passed via sret.
9848  SDLoc dl(Op);
9849  SDValue Arg = Op.getOperand(0);
9850  EVT ArgVT = Arg.getValueType();
9851  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9852  auto PtrVT = getPointerTy(DAG.getDataLayout());
9853 
9855  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9856 
9857  // Pair of floats / doubles used to pass the result.
9858  Type *RetTy = StructType::get(ArgTy, ArgTy);
9859  auto &DL = DAG.getDataLayout();
9860 
9861  ArgListTy Args;
9862  bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9863  SDValue SRet;
9864  if (ShouldUseSRet) {
9865  // Create stack object for sret.
9866  const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9867  const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9868  int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9869  SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9870 
9871  ArgListEntry Entry;
9872  Entry.Node = SRet;
9873  Entry.Ty = RetTy->getPointerTo();
9874  Entry.IsSExt = false;
9875  Entry.IsZExt = false;
9876  Entry.IsSRet = true;
9877  Args.push_back(Entry);
9878  RetTy = Type::getVoidTy(*DAG.getContext());
9879  }
9880 
9881  ArgListEntry Entry;
9882  Entry.Node = Arg;
9883  Entry.Ty = ArgTy;
9884  Entry.IsSExt = false;
9885  Entry.IsZExt = false;
9886  Args.push_back(Entry);
9887 
9888  RTLIB::Libcall LC =
9889  (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9890  const char *LibcallName = getLibcallName(LC);
9892  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
9893 
9895  CLI.setDebugLoc(dl)
9896  .setChain(DAG.getEntryNode())
9897  .setCallee(CC, RetTy, Callee, std::move(Args))
9898  .setDiscardResult(ShouldUseSRet);
9899  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9900 
9901  if (!ShouldUseSRet)
9902  return CallResult.first;
9903 
9904  SDValue LoadSin =
9905  DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
9906 
9907  // Address of cos field.
9908  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
9909  DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
9910  SDValue LoadCos =
9911  DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
9912 
9913  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
9914  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
9915  LoadSin.getValue(0), LoadCos.getValue(0));
9916 }
9917 
9918 SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
9919  bool Signed,
9920  SDValue &Chain) const {
9921  EVT VT = Op.getValueType();
9922  assert((VT == MVT::i32 || VT == MVT::i64) &&
9923  "unexpected type for custom lowering DIV");
9924  SDLoc dl(Op);
9925 
9926  const auto &DL = DAG.getDataLayout();
9927  const auto &TLI = DAG.getTargetLoweringInfo();
9928 
9929  const char *Name = nullptr;
9930  if (Signed)
9931  Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
9932  else
9933  Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
9934 
9935  SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
9936 
9938 
9939  for (auto AI : {1, 0}) {
9940  ArgListEntry Arg;
9941  Arg.Node = Op.getOperand(AI);
9942  Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
9943  Args.push_back(Arg);
9944  }
9945 
9946  CallLoweringInfo CLI(DAG);
9947  CLI.setDebugLoc(dl)
9948  .setChain(Chain)
9950  ES, std::move(Args));
9951 
9952  return LowerCallTo(CLI).first;
9953 }
9954 
9955 // This is a code size optimisation: return the original SDIV node to
9956 // DAGCombiner when we don't want to expand SDIV into a sequence of
9957 // instructions, and an empty node otherwise which will cause the
9958 // SDIV to be expanded in DAGCombine.
9959 SDValue
9960 ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
9961  SelectionDAG &DAG,
9962  SmallVectorImpl<SDNode *> &Created) const {
9963  // TODO: Support SREM
9964  if (N->getOpcode() != ISD::SDIV)
9965  return SDValue();
9966 
9967  const auto &ST = DAG.getSubtarget<ARMSubtarget>();
9968  const bool MinSize = ST.hasMinSize();
9969  const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
9970  : ST.hasDivideInARMMode();
9971 
9972  // Don't touch vector types; rewriting this may lead to scalarizing
9973  // the int divs.
9974  if (N->getOperand(0).getValueType().isVector())
9975  return SDValue();
9976 
9977  // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
9978  // hwdiv support for this to be really profitable.
9979  if (!(MinSize && HasDivide))
9980  return SDValue();
9981 
9982  // ARM mode is a bit simpler than Thumb: we can handle large power
9983  // of 2 immediates with 1 mov instruction; no further checks required,
9984  // just return the sdiv node.
9985  if (!ST.isThumb())
9986  return SDValue(N, 0);
9987 
9988  // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
9989  // and thus lose the code size benefits of a MOVS that requires only 2.
9990  // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
9991  // but as it's doing exactly this, it's not worth the trouble to get TTI.
9992  if (Divisor.sgt(128))
9993  return SDValue();
9994 
9995  return SDValue(N, 0);
9996 }
9997 
9998 SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
9999  bool Signed) const {
10000  assert(Op.getValueType() == MVT::i32 &&
10001  "unexpected type for custom lowering DIV");
10002  SDLoc dl(Op);
10003 
10004  SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
10005  DAG.getEntryNode(), Op.getOperand(1));
10006 
10007  return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10008 }
10009 
10011  SDLoc DL(N);
10012  SDValue Op = N->getOperand(1);
10013  if (N->getValueType(0) == MVT::i32)
10014  return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
10016  DAG.getConstant(0, DL, MVT::i32));
10018  DAG.getConstant(1, DL, MVT::i32));
10019  return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
10020  DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
10021 }
10022 
10023 void ARMTargetLowering::ExpandDIV_Windows(
10024  SDValue Op, SelectionDAG &DAG, bool Signed,
10026  const auto &DL = DAG.getDataLayout();
10027  const auto &TLI = DAG.getTargetLoweringInfo();
10028 
10029  assert(Op.getValueType() == MVT::i64 &&
10030  "unexpected type for custom lowering DIV");
10031  SDLoc dl(Op);
10032 
10033  SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
10034 
10035  SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10036 
10037  SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
10038  SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
10039  DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
10040  Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10041 
10042  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10043 }
10044 
10046  LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10047  EVT MemVT = LD->getMemoryVT();
10048  assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10049  MemVT == MVT::v16i1) &&
10050  "Expected a predicate type!");
10051  assert(MemVT == Op.getValueType());
10052  assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10053  "Expected a non-extending load");
10054  assert(LD->isUnindexed() && "Expected a unindexed load");
10055 
10056  // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10057  // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10058  // need to make sure that 8/4/2 bits are actually loaded into the correct
10059  // place, which means loading the value and then shuffling the values into
10060  // the bottom bits of the predicate.
10061  // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10062  // for BE).
10063  // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10064  // a natural VMSR(load), so needs to be reversed.
10065 
10066  SDLoc dl(Op);
10067  SDValue Load = DAG.getExtLoad(
10068  ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10069  EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
10070  LD->getMemOperand());
10071  SDValue Val = Load;
10072  if (DAG.getDataLayout().isBigEndian())
10073  Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10074  DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10075  DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10076  SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10077  if (MemVT != MVT::v16i1)
10078  Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10079  DAG.getConstant(0, dl, MVT::i32));
10080  return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10081 }
10082 
10083 void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10084  SelectionDAG &DAG) const {
10085  LoadSDNode *LD = cast<LoadSDNode>(N);
10086  EVT MemVT = LD->getMemoryVT();
10087  assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10088 
10089  if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10090  !Subtarget->isThumb1Only() && LD->isVolatile()) {
10091  SDLoc dl(N);
10093  ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10094  {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10095  SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10096  SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10097  SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10098  Results.append({Pair, Result.getValue(2)});
10099  }
10100 }
10101 
10103  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10104  EVT MemVT = ST->getMemoryVT();
10105  assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10106  MemVT == MVT::v16i1) &&
10107  "Expected a predicate type!");
10108  assert(MemVT == ST->getValue().getValueType());
10109  assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10110  assert(ST->isUnindexed() && "Expected a unindexed store");
10111 
10112  // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10113  // top bits unset and a scalar store.
10114  SDLoc dl(Op);
10115  SDValue Build = ST->getValue();
10116  if (MemVT != MVT::v16i1) {
10118  for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10119  unsigned Elt = DAG.getDataLayout().isBigEndian()
10120  ? MemVT.getVectorNumElements() - I - 1
10121  : I;
10122  Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10123  DAG.getConstant(Elt, dl, MVT::i32)));
10124  }
10125  for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10126  Ops.push_back(DAG.getUNDEF(MVT::i32));
10127  Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10128  }
10129  SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10130  if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10131  GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10132  DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10133  DAG.getConstant(16, dl, MVT::i32));
10134  return DAG.getTruncStore(
10135  ST->getChain(), dl, GRP, ST->getBasePtr(),
10136  EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
10137  ST->getMemOperand());
10138 }
10139 
10141  const ARMSubtarget *Subtarget) {
10142  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10143  EVT MemVT = ST->getMemoryVT();
10144  assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10145 
10146  if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10147  !Subtarget->isThumb1Only() && ST->isVolatile()) {
10148  SDNode *N = Op.getNode();
10149  SDLoc dl(N);
10150 
10151  SDValue Lo = DAG.getNode(
10152  ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10153  DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10154  MVT::i32));
10155  SDValue Hi = DAG.getNode(
10156  ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10157  DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10158  MVT::i32));
10159 
10161  {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10162  MemVT, ST->getMemOperand());
10163  } else if (Subtarget->hasMVEIntegerOps() &&
10164  ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10165  MemVT == MVT::v16i1))) {
10166  return LowerPredicateStore(Op, DAG);
10167  }
10168 
10169  return SDValue();
10170 }
10171 
10172 static bool isZeroVector(SDValue N) {
10173  return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10174  (N->getOpcode() == ARMISD::VMOVIMM &&
10175  isNullConstant(N->getOperand(0))));
10176 }
10177 
10179  MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10180  MVT VT = Op.getSimpleValueType();
10181  SDValue Mask = N->getMask();
10182  SDValue PassThru = N->getPassThru();
10183  SDLoc dl(Op);
10184 
10185  if (isZeroVector(PassThru))
10186  return Op;
10187 
10188  // MVE Masked loads use zero as the passthru value. Here we convert undef to
10189  // zero too, and other values are lowered to a select.
10190  SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10191  DAG.getTargetConstant(0, dl, MVT::i32));
10192  SDValue NewLoad = DAG.getMaskedLoad(
10193  VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10194  N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10195  N->getExtensionType(), N->isExpandingLoad());
10196  SDValue Combo = NewLoad;
10197  bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10198  PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10199  isZeroVector(PassThru->getOperand(0));
10200  if (!PassThru.isUndef() && !PassThruIsCastZero)
10201  Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10202  return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10203 }
10204 
10206  const ARMSubtarget *ST) {
10207  if (!ST->hasMVEIntegerOps())
10208  return SDValue();
10209 
10210  SDLoc dl(Op);
10211  unsigned BaseOpcode = 0;
10212  switch (Op->getOpcode()) {
10213  default: llvm_unreachable("Expected VECREDUCE opcode");
10214  case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10215  case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10216  case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10217  case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10218  case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10219  case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10220  case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10221  case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10222  }
10223 
10224  SDValue Op0 = Op->getOperand(0);
10225  EVT VT = Op0.getValueType();
10226  EVT EltVT = VT.getVectorElementType();
10227  unsigned NumElts = VT.getVectorNumElements();
10228  unsigned NumActiveLanes = NumElts;
10229 
10230  assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10231  NumActiveLanes == 2) &&
10232  "Only expected a power 2 vector size");
10233 
10234  // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10235  // allows us to easily extract vector elements from the lanes.
10236  while (NumActiveLanes > 4) {
10237  unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10238  SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10239  Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10240  NumActiveLanes /= 2;
10241  }
10242 
10243  SDValue Res;
10244  if (NumActiveLanes == 4) {
10245  // The remaining 4 elements are summed sequentially
10246  SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10247  DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10248  SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10249  DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10250  SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10251  DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10252  SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10253  DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10254  SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10255  SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10256  Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10257  } else {
10258  SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10259  DAG.getConstant(0, dl, MVT::i32));
10260  SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10261  DAG.getConstant(1, dl, MVT::i32));
10262  Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10263  }
10264 
10265  // Result type may be wider than element type.
10266  if (EltVT != Op->getValueType(0))
10267  Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10268  return Res;
10269 }
10270 
10272  const ARMSubtarget *ST) {
10273  if (!ST->hasMVEFloatOps())
10274  return SDValue();
10275  return LowerVecReduce(Op, DAG, ST);
10276 }
10277 
10279  if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10280  // Acquire/Release load/store is not legal for targets without a dmb or
10281  // equivalent available.
10282  return SDValue();
10283 
10284  // Monotonic load/store is legal for all targets.
10285  return Op;
10286 }
10287 
10290  SelectionDAG &DAG,
10291  const ARMSubtarget *Subtarget) {
10292  SDLoc DL(N);
10293  // Under Power Management extensions, the cycle-count is:
10294  // mrc p15, #0, <Rt>, c9, c13, #0
10295  SDValue Ops[] = { N->getOperand(0), // Chain
10296  DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10297  DAG.getTargetConstant(15, DL, MVT::i32),
10298  DAG.getTargetConstant(0, DL, MVT::i32),
10299  DAG.getTargetConstant(9, DL, MVT::i32),
10300  DAG.getTargetConstant(13, DL, MVT::i32),
10301  DAG.getTargetConstant(0, DL, MVT::i32)
10302  };
10303 
10304  SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10305  DAG.getVTList(MVT::i32, MVT::Other), Ops);
10306  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10307  DAG.getConstant(0, DL, MVT::i32)));
10308  Results.push_back(Cycles32.getValue(1));
10309 }
10310 
10312  SDLoc dl(V.getNode());
10313  SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
10314  SDValue VHi = DAG.getAnyExtOrTrunc(
10315  DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
10316  dl, MVT::i32);
10317  bool isBigEndian = DAG.getDataLayout().isBigEndian();
10318  if (isBigEndian)
10319  std::swap (VLo, VHi);
10320  SDValue RegClass =
10321  DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10322  SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10323  SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10324  const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
10325  return SDValue(
10326  DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10327 }
10328 
10331  SelectionDAG &DAG) {
10332  assert(N->getValueType(0) == MVT::i64 &&
10333  "AtomicCmpSwap on types less than 64 should be legal");
10334  SDValue Ops[] = {N->getOperand(1),
10335  createGPRPairNode(DAG, N->getOperand(2)),
10336  createGPRPairNode(DAG, N->getOperand(3)),
10337  N->getOperand(0)};
10338  SDNode *CmpSwap = DAG.getMachineNode(
10339  ARM::CMP_SWAP_64, SDLoc(N),
10341 
10342  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10343  DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10344 
10345  bool isBigEndian = DAG.getDataLayout().isBigEndian();
10346 
10347  SDValue Lo =
10348  DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10349  SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10350  SDValue Hi =
10351  DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10352  SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10353  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10354  Results.push_back(SDValue(CmpSwap, 2));
10355 }
10356 
10357 SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10358  SDLoc dl(Op);
10359  EVT VT = Op.getValueType();
10360  SDValue Chain = Op.getOperand(0);
10361  SDValue LHS = Op.getOperand(1);
10362  SDValue RHS = Op.getOperand(2);
10363  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10364  bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10365 
10366  // If we don't have instructions of this float type then soften to a libcall
10367  // and use SETCC instead.
10368  if (isUnsupportedFloatingType(LHS.getValueType())) {
10370  DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10371  if (!RHS.getNode()) {
10372  RHS = DAG.getConstant(0, dl, LHS.getValueType());
10373  CC = ISD::SETNE;
10374  }
10375  SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10376  DAG.getCondCode(CC));
10377  return DAG.getMergeValues({Result, Chain}, dl);
10378  }
10379 
10380  ARMCC::CondCodes CondCode, CondCode2;
10381  FPCCToARMCC(CC, CondCode, CondCode2);
10382 
10383  // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
10384  // in CMPFP and CMPFPE, but instead it should be made explicit by these
10385  // instructions using a chain instead of glue. This would also fix the problem
10386  // here (and also in LowerSELECT_CC) where we generate two comparisons when
10387  // CondCode2 != AL.
10388  SDValue True = DAG.getConstant(1, dl, VT);
10389  SDValue False = DAG.getConstant(0, dl, VT);
10390  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10391  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
10392  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10393  SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
10394  if (CondCode2 != ARMCC::AL) {
10395  ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10396  Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10397  Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
10398  }
10399  return DAG.getMergeValues({Result, Chain}, dl);
10400 }
10401 
10402 SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10404 
10405  EVT VT = getPointerTy(DAG.getDataLayout());
10406  SDLoc DL(Op);
10407  int FI = MFI.CreateFixedObject(4, 0, false);
10408  return DAG.getFrameIndex(FI, VT);
10409 }
10410 
10412  LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10413  switch (Op.getOpcode()) {
10414  default: llvm_unreachable("Don't know how to custom lower this!");
10415  case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10416  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10417  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10418  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10419  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10420  case ISD::SELECT: return LowerSELECT(Op, DAG);
10421  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10422  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10423  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10424  case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10425  case ISD::VASTART: return LowerVASTART(Op, DAG);
10426  case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10427  case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10428  case ISD::SINT_TO_FP:
10429  case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10432  case ISD::FP_TO_SINT:
10433  case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10434  case ISD::FP_TO_SINT_SAT:
10435  case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10436  case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10437  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10438  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10439  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10440  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10441  case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10442  case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10443  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10444  Subtarget);
10445  case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10446  case ISD::SHL:
10447  case ISD::SRL:
10448  case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10449  case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10450  case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10451  case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10452  case ISD::SRL_PARTS:
10453  case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10454  case ISD::CTTZ:
10455  case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10456  case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10457  case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10458  case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10459  case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10460  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10461  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10462  case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10463  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10464  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10465  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10466  case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10467  case ISD::SIGN_EXTEND:
10468  case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10469  case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10470  case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10471  case ISD::MUL: return LowerMUL(Op, DAG);
10472  case ISD::SDIV:
10473  if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10474  return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10475  return LowerSDIV(Op, DAG, Subtarget);
10476  case ISD::UDIV:
10477  if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10478  return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10479  return LowerUDIV(Op, DAG, Subtarget);
10480  case ISD::ADDCARRY:
10481  case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
10482  case ISD::SADDO:
10483  case ISD::SSUBO:
10484  return LowerSignedALUO(Op, DAG);
10485  case ISD::UADDO:
10486  case ISD::USUBO:
10487  return LowerUnsignedALUO(Op, DAG);
10488  case ISD::SADDSAT:
10489  case ISD::SSUBSAT:
10490  case ISD::UADDSAT:
10491  case ISD::USUBSAT:
10492  return LowerADDSUBSAT(Op, DAG, Subtarget);
10493  case ISD::LOAD:
10494  return LowerPredicateLoad(Op, DAG);
10495  case ISD::STORE:
10496  return LowerSTORE(Op, DAG, Subtarget);
10497  case ISD::MLOAD:
10498  return LowerMLOAD(Op, DAG);
10499  case ISD::VECREDUCE_MUL:
10500  case ISD::VECREDUCE_AND:
10501  case ISD::VECREDUCE_OR:
10502  case ISD::VECREDUCE_XOR:
10503  return LowerVecReduce(Op, DAG, Subtarget);
10504  case ISD::VECREDUCE_FADD:
10505  case ISD::VECREDUCE_FMUL:
10506  case ISD::VECREDUCE_FMIN:
10507  case ISD::VECREDUCE_FMAX:
10508  return LowerVecReduceF(Op, DAG, Subtarget);
10509  case ISD::ATOMIC_LOAD:
10510  case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10511  case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10512  case ISD::SDIVREM:
10513  case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10515  if (Subtarget->isTargetWindows())
10516  return LowerDYNAMIC_STACKALLOC(Op, DAG);
10517  llvm_unreachable("Don't know how to custom lower this!");
10518  case ISD::STRICT_FP_ROUND:
10519  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10520  case ISD::STRICT_FP_EXTEND:
10521  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10522  case ISD::STRICT_FSETCC:
10523  case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10524  case ISD::SPONENTRY:
10525  return LowerSPONENTRY(Op, DAG);
10526  case ARMISD::WIN__DBZCHK: return SDValue();
10527  }
10528 }
10529 
10531  SelectionDAG &DAG) {
10532  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
10533  unsigned Opc = 0;
10534  if (IntNo == Intrinsic::arm_smlald)
10535  Opc = ARMISD::SMLALD;
10536  else if (IntNo == Intrinsic::arm_smlaldx)
10537  Opc = ARMISD::SMLALDX;
10538  else if (IntNo == Intrinsic::arm_smlsld)
10539  Opc = ARMISD::SMLSLD;
10540  else if (IntNo == Intrinsic::arm_smlsldx)
10541  Opc = ARMISD::SMLSLDX;
10542  else
10543  return;
10544 
10545  SDLoc dl(N);
10547  N->getOperand(3),
10548  DAG.getConstant(0, dl, MVT::i32));
10550  N->getOperand(3),
10551  DAG.getConstant(1, dl, MVT::i32));
10552 
10553  SDValue LongMul = DAG.getNode(Opc, dl,
10554  DAG.getVTList(MVT::i32, MVT::i32),
10555  N->getOperand(1), N->getOperand(2),
10556  Lo, Hi);
10557  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10558  LongMul.getValue(0), LongMul.getValue(1)));
10559 }
10560 
10561 /// ReplaceNodeResults - Replace the results of node with an illegal result
10562 /// type with new values built out of custom code.
10565  SelectionDAG &DAG) const {
10566  SDValue Res;
10567  switch (N->getOpcode()) {
10568  default:
10569  llvm_unreachable("Don't know how to custom expand this!");
10570  case ISD::READ_REGISTER:
10571  ExpandREAD_REGISTER(N, Results, DAG);
10572  break;
10573  case ISD::BITCAST:
10574  Res = ExpandBITCAST(N, DAG, Subtarget);
10575  break;
10576  case ISD::SRL:
10577  case ISD::SRA:
10578  case ISD::SHL:
10579  Res = Expand64BitShift(N, DAG, Subtarget);
10580  break;
10581  case ISD::SREM:
10582  case ISD::UREM:
10583  Res = LowerREM(N, DAG);
10584  break;
10585  case ISD::SDIVREM:
10586  case ISD::UDIVREM:
10587  Res = LowerDivRem(SDValue(N, 0), DAG);
10588  assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10589  Results.push_back(Res.getValue(0));
10590  Results.push_back(Res.getValue(1));
10591  return;
10592  case ISD::SADDSAT:
10593  case ISD::SSUBSAT:
10594  case ISD::UADDSAT:
10595  case ISD::USUBSAT:
10596  Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10597  break;
10598  case ISD::READCYCLECOUNTER:
10599  ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10600  return;
10601  case ISD::UDIV:
10602  case ISD::SDIV:
10603  assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10604  return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10605  Results);
10606  case ISD::ATOMIC_CMP_SWAP:
10608  return;
10610  return ReplaceLongIntrinsic(N, Results, DAG);
10611  case ISD::LOAD:
10612  LowerLOAD(N, Results, DAG);
10613  break;
10614  case ISD::TRUNCATE:
10615  Res = LowerTruncate(N, DAG, Subtarget);
10616  break;
10617  case ISD::SIGN_EXTEND:
10618  case ISD::ZERO_EXTEND:
10619  Res = LowerVectorExtend(N, DAG, Subtarget);
10620  break;
10621  case ISD::FP_TO_SINT_SAT:
10622  case ISD::FP_TO_UINT_SAT:
10623  Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10624  break;
10625  }
10626  if (Res.getNode())
10627  Results.push_back(Res);
10628 }
10629 
10630 //===----------------------------------------------------------------------===//
10631 // ARM Scheduler Hooks
10632 //===----------------------------------------------------------------------===//
10633 
10634 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10635 /// registers the function context.
10636 void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10638  MachineBasicBlock *DispatchBB,
10639  int FI) const {
10640  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10641  "ROPI/RWPI not currently supported with SjLj");
10642  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10643  DebugLoc dl = MI.getDebugLoc();
10644  MachineFunction *MF = MBB->getParent();
10646  MachineConstantPool *MCP = MF->getConstantPool();
10647  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
10648  const Function &F = MF->getFunction();
10649 
10650  bool isThumb = Subtarget->isThumb();
10651  bool isThumb2 = Subtarget->isThumb2();
10652 
10653  unsigned PCLabelId = AFI->createPICLabelUId();
10654  unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10655  ARMConstantPoolValue *CPV =
10656  ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10657  unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10658 
10659  const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10660  : &ARM::GPRRegClass;
10661 
10662  // Grab constant pool and fixed stack memory operands.
10663  MachineMemOperand *CPMMO =
10666 
10667  MachineMemOperand *FIMMOSt =
10670 
10671  // Load the address of the dispatch MBB into the jump buffer.
10672  if (isThumb2) {
10673  // Incoming value: jbuf
10674  // ldr.n r5, LCPI1_1
10675  // orr r5, r5, #1
10676  // add r5, pc
10677  // str r5, [$jbuf, #+4] ; &jbuf[1]
10678  Register NewVReg1 = MRI->createVirtualRegister(TRC);
10679  BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10680  .addConstantPoolIndex(CPI)
10681  .addMemOperand(CPMMO)
10682  .add(predOps(ARMCC::AL));
10683  // Set the low bit because of thumb mode.
10684  Register NewVReg2 = MRI->createVirtualRegister(TRC);
10685  BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10686  .addReg(NewVReg1, RegState::Kill)
10687  .addImm(0x01)
10688  .add(predOps(ARMCC::AL))
10689  .add(condCodeOp());
10690  Register NewVReg3 = MRI->createVirtualRegister(TRC);
10691  BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10692  .addReg(NewVReg2, RegState::Kill)
10693  .addImm(PCLabelId);
10694  BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10695  .addReg(NewVReg3, RegState::Kill)
10696  .addFrameIndex(FI)
10697  .addImm(36) // &jbuf[1] :: pc
10698  .addMemOperand(FIMMOSt)
10699  .add(predOps(ARMCC::AL));
10700  } else if (isThumb) {
10701  // Incoming value: jbuf
10702  // ldr.n r1, LCPI1_4
10703  // add r1, pc
10704  // mov r2, #1
10705  // orrs r1, r2
10706  // add r2, $jbuf, #+4 ; &jbuf[1]
10707  // str r1, [r2]
10708  Register NewVReg1 = MRI->createVirtualRegister(TRC);
10709  BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10710  .addConstantPoolIndex(CPI)
10711  .addMemOperand(CPMMO)
10712  .add(predOps(ARMCC::AL));
10713  Register NewVReg2 = MRI->createVirtualRegister(TRC);
10714  BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10715  .addReg(NewVReg1, RegState::Kill)
10716  .addImm(PCLabelId);
10717  // Set the low bit because of thumb mode.
10718  Register NewVReg3 = MRI->createVirtualRegister(TRC);
10719  BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10720  .addReg(ARM::CPSR, RegState::Define)
10721  .addImm(1)
10722  .add(predOps(ARMCC::AL));
10723  Register NewVReg4 = MRI->createVirtualRegister(TRC);
10724  BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10725  .addReg(ARM::CPSR, RegState::Define)
10726  .addReg(NewVReg2, RegState::Kill)
10727  .addReg(NewVReg3, RegState::Kill)
10728  .add(predOps(ARMCC::AL));
10729  Register NewVReg5 = MRI->createVirtualRegister(TRC);
10730  BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10731  .addFrameIndex(FI)
10732  .addImm(36); // &jbuf[1] :: pc
10733  BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10734  .addReg(NewVReg4, RegState::Kill)
10735  .addReg(NewVReg5, RegState::Kill)
10736  .addImm(0)
10737  .addMemOperand(FIMMOSt)
10738  .add(predOps(ARMCC::AL));
10739  } else {
10740  // Incoming value: jbuf
10741  // ldr r1, LCPI1_1
10742  // add r1, pc, r1
10743  // str r1, [$jbuf, #+4] ; &jbuf[1]
10744  Register NewVReg1 = MRI->createVirtualRegister(TRC);
10745  BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10746  .addConstantPoolIndex(CPI)
10747  .addImm(0)
10748  .addMemOperand(CPMMO)
10749  .add(predOps(ARMCC::AL));
10750  Register NewVReg2 = MRI->createVirtualRegister(TRC);
10751  BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10752  .addReg(NewVReg1, RegState::Kill)
10753  .addImm(PCLabelId)
10754  .add(predOps(ARMCC::AL));
10755  BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10756  .addReg(NewVReg2, RegState::Kill)
10757  .addFrameIndex(FI)
10758  .addImm(36) // &jbuf[1] :: pc
10759  .addMemOperand(FIMMOSt)
10760  .add(predOps(ARMCC::AL));
10761  }
10762 }
10763 
10764 void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10765  MachineBasicBlock *MBB) const {
10766  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10767  DebugLoc dl = MI.getDebugLoc();
10768  MachineFunction *MF = MBB->getParent();
10770  MachineFrameInfo &MFI = MF->getFrameInfo();
10771  int FI = MFI.getFunctionContextIndex();
10772 
10773  const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10774  : &ARM::GPRnopcRegClass;
10775 
10776  // Get a mapping of the call site numbers to all of the landing pads they're
10777  // associated with.
10779  unsigned MaxCSNum = 0;
10780  for (MachineBasicBlock &BB : *MF) {
10781  if (!BB.isEHPad())
10782  continue;
10783 
10784  // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10785  // pad.
10786  for (MachineInstr &II : BB) {
10787  if (!II.isEHLabel())
10788  continue;
10789 
10790  MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10791  if (!MF->hasCallSiteLandingPad(Sym)) continue;
10792 
10793  SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10794  for (unsigned Idx : CallSiteIdxs) {
10795  CallSiteNumToLPad[Idx].push_back(&BB);
10796  MaxCSNum = std::max(MaxCSNum, Idx);
10797  }
10798  break;
10799  }
10800  }
10801 
10802  // Get an ordered list of the machine basic blocks for the jump table.
10803  std::vector<MachineBasicBlock*> LPadList;
10805  LPadList.reserve(CallSiteNumToLPad.size());
10806  for (unsigned I = 1; I <= MaxCSNum; ++I) {
10807  SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10808  for (MachineBasicBlock *MBB : MBBList) {
10809  LPadList.push_back(MBB);
10810  InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
10811  }
10812  }
10813 
10814  assert(!LPadList.empty() &&
10815  "No landing pad destinations for the dispatch jump table!");
10816 
10817  // Create the jump table and associated information.
10818  MachineJumpTableInfo *JTI =
10819  MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10820  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10821 
10822  // Create the MBBs for the dispatch code.
10823 
10824  // Shove the dispatch's address into the return slot in the function context.
10825  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10826  DispatchBB->setIsEHPad();
10827 
10828  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10829  unsigned trap_opcode;
10830  if (Subtarget->isThumb())
10831  trap_opcode = ARM::tTRAP;
10832  else
10833  trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
10834 
10835  BuildMI(TrapBB, dl, TII->get(trap_opcode));
10836  DispatchBB->addSuccessor(TrapBB);
10837 
10838  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
10839  DispatchBB->addSuccessor(DispContBB);
10840 
10841  // Insert and MBBs.
10842  MF->insert(MF->end(), DispatchBB);
10843  MF->insert(MF->end(), DispContBB);
10844  MF->insert(MF->end(), TrapBB);
10845 
10846  // Insert code into the entry block that creates and registers the function
10847  // context.
10848  SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
10849 
10850  MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
10853 
10854  MachineInstrBuilder MIB;
10855  MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
10856 
10857  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
10858  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
10859 
10860  // Add a register mask with no preserved registers. This results in all
10861  // registers being marked as clobbered. This can't work if the dispatch block
10862  // is in a Thumb1 function and is linked with ARM code which uses the FP
10863  // registers, as there is no way to preserve the FP registers in Thumb1 mode.
10865 
10866  bool IsPositionIndependent = isPositionIndependent();
10867  unsigned NumLPads = LPadList.size();
10868  if (Subtarget->isThumb2()) {
10869  Register NewVReg1 = MRI->createVirtualRegister(TRC);
10870  BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
10871  .addFrameIndex(FI)
10872  .addImm(4)
10873  .addMemOperand(FIMMOLd)
10874  .add(predOps(ARMCC::AL));
10875 
10876  if (NumLPads < 256) {
10877  BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
10878  .addReg(NewVReg1)
10879  .addImm(LPadList.size())
10880  .add(predOps(ARMCC::AL));
10881  } else {
10882  Register VReg1 = MRI->createVirtualRegister(TRC);
10883  BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
10884  .addImm(NumLPads & 0xFFFF)
10885  .add(predOps(ARMCC::AL));
10886 
10887  unsigned VReg2 = VReg1;
10888  if ((NumLPads & 0xFFFF0000) != 0) {
10889  VReg2 = MRI->createVirtualRegister(TRC);
10890  BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
10891  .addReg(VReg1)
10892  .addImm(NumLPads >> 16)
10893  .add(predOps(ARMCC::AL));
10894  }
10895 
10896  BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
10897  .addReg(NewVReg1)
10898  .addReg(VReg2)
10899  .add(predOps(ARMCC::AL));
10900  }
10901 
10902  BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
10903  .addMBB(TrapBB)
10904  .addImm(ARMCC::HI)
10905  .addReg(ARM::CPSR);
10906 
10907  Register NewVReg3 = MRI->createVirtualRegister(TRC);
10908  BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
10909  .addJumpTableIndex(MJTI)
10910  .add(predOps(ARMCC::AL));
10911 
10912  Register NewVReg4 = MRI->createVirtualRegister(TRC);
10913  BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
10914  .addReg(NewVReg3, RegState::Kill)
10915  .addReg(NewVReg1)
10917  .add(predOps(ARMCC::AL))
10918  .add(condCodeOp());
10919 
10920  BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
10921  .addReg(NewVReg4, RegState::Kill)
10922  .addReg(NewVReg1)
10923  .addJumpTableIndex(MJTI);
10924  } else if (Subtarget->isThumb()) {
10925  Register NewVReg1 = MRI->createVirtualRegister(TRC);
10926  BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
10927  .addFrameIndex(FI)
10928  .addImm(1)
10929  .addMemOperand(FIMMOLd)
10930  .add(predOps(ARMCC::AL));
10931 
10932  if (NumLPads < 256) {
10933  BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
10934  .addReg(NewVReg1)
10935  .addImm(NumLPads)
10936  .add(predOps(ARMCC::AL));
10937  } else {
10938  MachineConstantPool *ConstantPool = MF->getConstantPool();
10939  Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
10940  const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
10941 
10942  // MachineConstantPool wants an explicit alignment.
10943  Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
10944  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
10945 
10946  Register VReg1 = MRI->createVirtualRegister(TRC);
10947  BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
10948  .addReg(VReg1, RegState::Define)
10949  .addConstantPoolIndex(Idx)
10950  .add(predOps(ARMCC::AL));
10951  BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
10952  .addReg(NewVReg1)
10953  .addReg(VReg1)
10954  .add(predOps(ARMCC::AL));
10955  }
10956 
10957  BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
10958  .addMBB(TrapBB)
10959  .addImm(ARMCC::HI)
10960  .addReg(ARM::CPSR);
10961 
10962  Register NewVReg2 = MRI->createVirtualRegister(TRC);
10963  BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
10964  .addReg(ARM::CPSR, RegState::Define)
10965  .addReg(NewVReg1)
10966  .addImm(2)
10967  .add(predOps(ARMCC::AL));
10968 
10969  Register NewVReg3 = MRI->createVirtualRegister(TRC);
10970  BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
10971  .addJumpTableIndex(MJTI)
10972  .add(predOps(ARMCC::AL));
10973 
10974  Register NewVReg4 = MRI->createVirtualRegister(TRC);
10975  BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
10976  .addReg(ARM::CPSR, RegState::Define)
10977  .addReg(NewVReg2, RegState::Kill)
10978  .addReg(NewVReg3)
10979  .add(predOps(ARMCC::AL));
10980 
10981  MachineMemOperand *JTMMOLd =
10982  MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
10984 
10985  Register NewVReg5 = MRI->createVirtualRegister(TRC);
10986  BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
10987  .addReg(NewVReg4, RegState::Kill)
10988  .addImm(0)
10989  .addMemOperand(JTMMOLd)
10990  .add(predOps(ARMCC::AL));
10991 
10992  unsigned NewVReg6 = NewVReg5;
10993  if (IsPositionIndependent) {
10994  NewVReg6 = MRI->createVirtualRegister(TRC);
10995  BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
10996  .addReg(ARM::CPSR, RegState::Define)
10997  .addReg(NewVReg5, RegState::Kill)
10998  .addReg(NewVReg3)
10999  .add(predOps(ARMCC::AL));
11000  }
11001 
11002  BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11003  .addReg(NewVReg6, RegState::Kill)
11004  .addJumpTableIndex(MJTI);
11005  } else {
11006  Register NewVReg1 = MRI->createVirtualRegister(TRC);
11007  BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11008  .addFrameIndex(FI)
11009  .addImm(4)
11010  .addMemOperand(FIMMOLd)
11011  .add(predOps(ARMCC::AL));
11012 
11013  if (NumLPads < 256) {
11014  BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11015  .addReg(NewVReg1)
11016  .addImm(NumLPads)
11017  .add(predOps(ARMCC::AL));
11018  } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11019  Register VReg1 = MRI->createVirtualRegister(TRC);
11020  BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11021  .addImm(NumLPads & 0xFFFF)
11022  .add(predOps(ARMCC::AL));
11023 
11024  unsigned VReg2 = VReg1;
11025  if ((NumLPads & 0xFFFF0000) != 0) {
11026  VReg2 = MRI->createVirtualRegister(TRC);
11027  BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11028  .addReg(VReg1)
11029  .addImm(NumLPads >> 16)
11030  .add(predOps(ARMCC::AL));
11031  }
11032 
11033  BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11034  .addReg(NewVReg1)
11035  .addReg(VReg2)
11036  .add(predOps(ARMCC::AL));
11037  } else {
11038  MachineConstantPool *ConstantPool = MF->getConstantPool();
11039  Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11040  const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11041 
11042  // MachineConstantPool wants an explicit alignment.
11043  Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11044  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11045 
11046  Register VReg1 = MRI->createVirtualRegister(TRC);
11047  BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11048  .addReg(VReg1, RegState::Define)
11049  .addConstantPoolIndex(Idx)
11050  .addImm(0)
11051  .add(predOps(ARMCC::AL));
11052  BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11053  .addReg(NewVReg1)
11054  .addReg(VReg1, RegState::Kill)
11055  .add(predOps(ARMCC::AL));
11056  }
11057 
11058  BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11059  .addMBB(TrapBB)
11060  .addImm(ARMCC::HI)
11061  .addReg(ARM::CPSR);
11062 
11063  Register NewVReg3 = MRI->createVirtualRegister(TRC);
11064  BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11065  .addReg(NewVReg1)
11067  .add(predOps(ARMCC::AL))
11068  .add(condCodeOp());
11069  Register NewVReg4 = MRI->createVirtualRegister(TRC);
11070  BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11071  .addJumpTableIndex(MJTI)
11072  .add(predOps(ARMCC::AL));
11073 
11074  MachineMemOperand *JTMMOLd =
11075  MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11077  Register NewVReg5 = MRI->createVirtualRegister(TRC);
11078  BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11079  .addReg(NewVReg3, RegState::Kill)
11080  .addReg(NewVReg4)
11081  .addImm(0)
11082  .addMemOperand(JTMMOLd)
11083  .add(predOps(ARMCC::AL));
11084 
11085  if (IsPositionIndependent) {
11086  BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11087  .addReg(NewVReg5, RegState::Kill)
11088  .addReg(NewVReg4)
11089  .addJumpTableIndex(MJTI);
11090  } else {
11091  BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11092  .addReg(NewVReg5, RegState::Kill)
11093  .addJumpTableIndex(MJTI);
11094  }
11095  }
11096 
11097  // Add the jump table entries as successors to the MBB.
11099  for (MachineBasicBlock *CurMBB : LPadList) {
11100  if (SeenMBBs.insert(CurMBB).second)
11101  DispContBB->addSuccessor(CurMBB);
11102  }
11103 
11104  // N.B. the order the invoke BBs are processed in doesn't matter here.
11105  const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11107  for (MachineBasicBlock *BB : InvokeBBs) {
11108 
11109  // Remove the landing pad successor from the invoke block and replace it
11110  // with the new dispatch block.
11111  SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11112  while (!Successors.empty()) {
11113  MachineBasicBlock *SMBB = Successors.pop_back_val();
11114  if (SMBB->isEHPad()) {
11115  BB->removeSuccessor(SMBB);
11116  MBBLPads.push_back(SMBB);
11117  }
11118  }
11119 
11120  BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11121  BB->normalizeSuccProbs();
11122 
11123  // Find the invoke call and mark all of the callee-saved registers as
11124  // 'implicit defined' so that they're spilled. This prevents code from
11125  // moving instructions to before the EH block, where they will never be
11126  // executed.
11128  II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11129  if (!II->isCall()) continue;
11130 
11131  DenseMap<unsigned, bool> DefRegs;
11133  OI = II->operands_begin(), OE = II->operands_end();
11134  OI != OE; ++OI) {
11135  if (!OI->isReg()) continue;
11136  DefRegs[OI->getReg()] = true;
11137  }
11138 
11139  MachineInstrBuilder MIB(*MF, &*II);
11140 
11141  for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11142  unsigned Reg = SavedRegs[i];
11143  if (Subtarget->isThumb2() &&
11144  !ARM::tGPRRegClass.contains(Reg) &&
11145  !ARM::hGPRRegClass.contains(Reg))
11146  continue;
11147  if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11148  continue;
11149  if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11150  continue;
11151  if (!DefRegs[Reg])
11153  }
11154 
11155  break;
11156  }
11157  }
11158 
11159  // Mark all former landing pads as non-landing pads. The dispatch is the only
11160  // landing pad now.
11161  for (MachineBasicBlock *MBBLPad : MBBLPads)
11162  MBBLPad->setIsEHPad(false);
11163 
11164  // The instruction is gone now.
11165  MI.eraseFromParent();
11166 }
11167 
11168 static
11170  for (MachineBasicBlock *S : MBB->successors())
11171  if (S != Succ)
11172  return S;
11173  llvm_unreachable("Expecting a BB with two successors!");
11174 }
11175 
11176 /// Return the load opcode for a given load size. If load size >= 8,
11177 /// neon opcode will be returned.
11178 static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11179  if (LdSize >= 8)
11180  return LdSize == 16 ? ARM::VLD1q32wb_fixed
11181  : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11182  if (IsThumb1)
11183  return LdSize == 4 ? ARM::tLDRi
11184  : LdSize == 2 ? ARM::tLDRHi
11185  : LdSize == 1 ? ARM::tLDRBi : 0;
11186  if (IsThumb2)
11187  return LdSize == 4 ? ARM::t2LDR_POST
11188  : LdSize == 2 ? ARM::t2LDRH_POST
11189  : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11190  return LdSize == 4 ? ARM::LDR_POST_IMM
11191  : LdSize == 2 ? ARM::LDRH_POST
11192  : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11193 }
11194 
11195 /// Return the store opcode for a given store size. If store size >= 8,
11196 /// neon opcode will be returned.
11197 static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11198  if (StSize >= 8)
11199  return StSize == 16 ? ARM::VST1q32wb_fixed
11200  : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11201  if (IsThumb1)
11202  return StSize == 4 ? ARM::tSTRi
11203  : StSize == 2 ? ARM::tSTRHi
11204  : StSize == 1 ? ARM::tSTRBi : 0;
11205  if (IsThumb2)
11206  return StSize == 4 ? ARM::t2STR_POST
11207  : StSize == 2 ? ARM::t2STRH_POST
11208  : StSize == 1 ? ARM::t2STRB_POST : 0;
11209  return StSize == 4 ? ARM::STR_POST_IMM
11210  : StSize == 2 ? ARM::STRH_POST
11211  : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11212 }
11213 
11214 /// Emit a post-increment load operation with given size. The instructions
11215 /// will be added to BB at Pos.
11217  const TargetInstrInfo *TII, const DebugLoc &dl,
11218  unsigned LdSize, unsigned Data, unsigned AddrIn,
11219  unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11220  unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11221  assert(LdOpc != 0 && "Should have a load opcode");
11222  if (LdSize >= 8) {
11223  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11224  .addReg(AddrOut, RegState::Define)
11225  .addReg(AddrIn)
11226  .addImm(0)
11227  .add(predOps(ARMCC::AL));
11228  } else if (IsThumb1) {
11229  // load + update AddrIn
11230  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11231  .addReg(AddrIn)
11232  .addImm(0)
11233  .add(predOps(ARMCC::AL));
11234  BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11235  .add(t1CondCodeOp())
11236  .addReg(AddrIn)
11237  .addImm(LdSize)
11238  .add(predOps(ARMCC::AL));
11239  } else if (IsThumb2) {
11240  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11241  .addReg(AddrOut, RegState::Define)
11242  .addReg(AddrIn)
11243  .addImm(LdSize)
11244  .add(predOps(ARMCC::AL));
11245  } else { // arm
11246  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11247  .addReg(AddrOut, RegState::Define)
11248  .addReg(AddrIn)
11249  .addReg(0)
11250  .addImm(LdSize)
11251  .add(predOps(ARMCC::AL));
11252  }
11253 }
11254 
11255 /// Emit a post-increment store operation with given size. The instructions
11256 /// will be added to BB at Pos.
11258  const TargetInstrInfo *TII, const DebugLoc &dl,
11259  unsigned StSize, unsigned Data, unsigned AddrIn,
11260  unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11261  unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11262  assert(StOpc != 0 && "Should have a store opcode");
11263  if (StSize >= 8) {
11264  BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11265  .addReg(AddrIn)
11266  .addImm(0)
11267  .addReg(Data)
11268  .add(predOps(ARMCC::AL));
11269  } else if (IsThumb1) {
11270  // store + update AddrIn
11271  BuildMI(*BB, Pos, dl, TII->get(StOpc))
11272  .addReg(Data)
11273  .addReg(AddrIn)
11274  .addImm(0)
11275  .add(predOps(ARMCC::AL));
11276  BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11277  .add(t1CondCodeOp())
11278  .addReg(AddrIn)
11279  .addImm(StSize)
11280  .add(predOps(ARMCC::AL));
11281  } else if (IsThumb2) {
11282  BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11283  .addReg(Data)
11284  .addReg(AddrIn)
11285  .addImm(StSize)
11286  .add(predOps(ARMCC::AL));
11287  } else { // arm
11288  BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11289  .addReg(Data)
11290  .addReg(AddrIn)
11291  .addReg(0)
11292  .addImm(StSize)
11293  .add(predOps(ARMCC::AL));
11294  }
11295 }
11296 
11298 ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11299  MachineBasicBlock *BB) const {
11300  // This pseudo instruction has 3 operands: dst, src, size
11301  // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11302  // Otherwise, we will generate unrolled scalar copies.
11303  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11304  const BasicBlock *LLVM_BB = BB->getBasicBlock();
11305  MachineFunction::iterator It = ++BB->getIterator();
11306 
11307  Register dest = MI.getOperand(0).getReg();
11308  Register src = MI.getOperand(1).getReg();
11309  unsigned SizeVal = MI.getOperand(2).getImm();
11310  unsigned Alignment = MI.getOperand(3).getImm();
11311  DebugLoc dl = MI.getDebugLoc();
11312 
11313  MachineFunction *MF = BB->getParent();
11315  unsigned UnitSize = 0;
11316  const TargetRegisterClass *TRC = nullptr;
11317  const TargetRegisterClass *VecTRC = nullptr;
11318 
11319  bool IsThumb1 = Subtarget->isThumb1Only();
11320  bool IsThumb2 = Subtarget->isThumb2();
11321  bool IsThumb = Subtarget->isThumb();
11322 
11323  if (Alignment & 1) {
11324  UnitSize = 1;
11325  } else if (Alignment & 2) {
11326  UnitSize = 2;
11327  } else {
11328  // Check whether we can use NEON instructions.
11329  if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11330  Subtarget->hasNEON()) {
11331  if ((Alignment % 16 == 0) && SizeVal >= 16)
11332  UnitSize = 16;
11333  else if ((Alignment % 8 == 0) && SizeVal >= 8)
11334  UnitSize = 8;
11335  }
11336  // Can't use NEON instructions.
11337  if (UnitSize == 0)
11338  UnitSize = 4;
11339  }
11340 
11341  // Select the correct opcode and register class for unit size load/store
11342  bool IsNeon = UnitSize >= 8;
11343  TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11344  if (IsNeon)
11345  VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11346  : UnitSize == 8 ? &ARM::DPRRegClass
11347  : nullptr;
11348 
11349  unsigned BytesLeft = SizeVal % UnitSize;
11350  unsigned LoopSize = SizeVal - BytesLeft;
11351 
11352  if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11353  // Use LDR and STR to copy.
11354  // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11355  // [destOut] = STR_POST(scratch, destIn, UnitSize)
11356  unsigned srcIn = src;
11357  unsigned destIn = dest;
11358  for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11359  Register srcOut = MRI.createVirtualRegister(TRC);
11360  Register destOut = MRI.createVirtualRegister(TRC);
11361  Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11362  emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11363  IsThumb1, IsThumb2);
11364  emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11365  IsThumb1, IsThumb2);
11366  srcIn = srcOut;
11367  destIn = destOut;
11368  }
11369 
11370  // Handle the leftover bytes with LDRB and STRB.
11371  // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11372  // [destOut] = STRB_POST(scratch, destIn, 1)
11373  for (unsigned i = 0; i < BytesLeft; i++) {
11374  Register srcOut = MRI.createVirtualRegister(TRC);
11375  Register destOut = MRI.createVirtualRegister(TRC);
11376  Register scratch = MRI.createVirtualRegister(TRC);
11377  emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11378  IsThumb1, IsThumb2);
11379  emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11380  IsThumb1, IsThumb2);
11381  srcIn = srcOut;
11382  destIn = destOut;
11383  }
11384  MI.eraseFromParent(); // The instruction is gone now.
11385  return BB;
11386  }
11387 
11388  // Expand the pseudo op to a loop.
11389  // thisMBB:
11390  // ...
11391  // movw varEnd, # --> with thumb2
11392  // movt varEnd, #
11393  // ldrcp varEnd, idx --> without thumb2
11394  // fallthrough --> loopMBB
11395  // loopMBB:
11396  // PHI varPhi, varEnd, varLoop
11397  // PHI srcPhi, src, srcLoop
11398  // PHI destPhi, dst, destLoop
11399  // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11400  // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11401  // subs varLoop, varPhi, #UnitSize
11402  // bne loopMBB
11403  // fallthrough --> exitMBB
11404  // exitMBB:
11405  // epilogue to handle left-over bytes
11406  // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11407  // [destOut] = STRB_POST(scratch, destLoop, 1)
11408  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11409  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11410  MF->insert(It, loopMBB);
11411  MF->insert(It, exitMBB);
11412 
11413  // Transfer the remainder of BB and its successor edges to exitMBB.
11414  exitMBB->splice(exitMBB->begin(), BB,
11415  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11417 
11418  // Load an immediate to varEnd.
11419  Register varEnd = MRI.createVirtualRegister(TRC);
11420  if (Subtarget->useMovt()) {
11421  unsigned Vtmp = varEnd;
11422  if ((LoopSize & 0xFFFF0000) != 0)
11423  Vtmp = MRI.createVirtualRegister(TRC);
11424  BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
11425  .addImm(LoopSize & 0xFFFF)
11426  .add(predOps(ARMCC::AL));
11427 
11428  if ((LoopSize & 0xFFFF0000) != 0)
11429  BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
11430  .addReg(Vtmp)
11431  .addImm(LoopSize >> 16)
11432  .add(predOps(ARMCC::AL));
11433  } else {
11436  const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11437 
11438  // MachineConstantPool wants an explicit alignment.
11439  Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11440  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11441  MachineMemOperand *CPMMO =
11444 
11445  if (IsThumb)
11446  BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11447  .addReg(varEnd, RegState::Define)
11448  .addConstantPoolIndex(Idx)
11449  .add(predOps(ARMCC::AL))
11450  .addMemOperand(CPMMO);
11451  else
11452  BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11453  .addReg(varEnd, RegState::Define)
11454  .addConstantPoolIndex(Idx)
11455  .addImm(0)
11456  .add(predOps(ARMCC::AL))
11457  .addMemOperand(CPMMO);
11458  }
11459  BB->addSuccessor(loopMBB);
11460 
11461  // Generate the loop body:
11462  // varPhi = PHI(varLoop, varEnd)
11463  // srcPhi = PHI(srcLoop, src)
11464  // destPhi = PHI(destLoop, dst)
11465  MachineBasicBlock *entryBB = BB;
11466  BB = loopMBB;
11467  Register varLoop = MRI.createVirtualRegister(TRC);
11468  Register varPhi = MRI.createVirtualRegister(TRC);
11469  Register srcLoop = MRI.createVirtualRegister(TRC);
11470  Register srcPhi = MRI.createVirtualRegister(TRC);
11471  Register destLoop = MRI.createVirtualRegister(TRC);
11472  Register destPhi = MRI.createVirtualRegister(TRC);
11473 
11474  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11475  .addReg(varLoop).addMBB(loopMBB)
11476  .addReg(varEnd).addMBB(entryBB);
11477  BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11478  .addReg(srcLoop).addMBB(loopMBB)
11479  .addReg(src).addMBB(entryBB);
11480  BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11481  .addReg(destLoop).addMBB(loopMBB)
11482  .addReg(dest).addMBB(entryBB);
11483 
11484  // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11485  // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11486  Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11487  emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11488  IsThumb1, IsThumb2);
11489  emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11490  IsThumb1, IsThumb2);
11491 
11492  // Decrement loop variable by UnitSize.
11493  if (IsThumb1) {
11494  BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11495  .add(t1CondCodeOp())
11496  .addReg(varPhi)
11497  .addImm(UnitSize)
11498  .add(predOps(ARMCC::AL));
11499  } else {
11500  MachineInstrBuilder MIB =
11501  BuildMI(*BB, BB->end(), dl,
11502  TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11503  MIB.addReg(varPhi)
11504  .addImm(UnitSize)
11505  .add(predOps(ARMCC::AL))
11506  .add(condCodeOp());
11507  MIB->getOperand(5).setReg(ARM::CPSR);
11508  MIB->getOperand(5).setIsDef(true);
11509  }
11510  BuildMI(*BB, BB->end(), dl,
11511  TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11512  .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11513 
11514  // loopMBB can loop back to loopMBB or fall through to exitMBB.
11515  BB->addSuccessor(loopMBB);
11516  BB->addSuccessor(exitMBB);
11517 
11518  // Add epilogue to handle BytesLeft.
11519  BB = exitMBB;
11520  auto StartOfExit = exitMBB->begin();
11521 
11522  // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11523  // [destOut] = STRB_POST(scratch, destLoop, 1)
11524  unsigned srcIn = srcLoop;
11525  unsigned destIn = destLoop;
11526  for (unsigned i = 0; i < BytesLeft; i++) {
11527  Register srcOut = MRI.createVirtualRegister(TRC);
11528  Register destOut = MRI.createVirtualRegister(TRC);
11529  Register scratch = MRI.createVirtualRegister(TRC);
11530  emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11531  IsThumb1, IsThumb2);
11532  emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11533  IsThumb1, IsThumb2);
11534  srcIn = srcOut;
11535  destIn = destOut;
11536  }
11537 
11538  MI.eraseFromParent(); // The instruction is gone now.
11539  return BB;
11540 }
11541 
11543 ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11544  MachineBasicBlock *MBB) const {
11545  const TargetMachine &TM = getTargetMachine();
11546  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11547  DebugLoc DL = MI.getDebugLoc();
11548 
11549  assert(Subtarget->isTargetWindows() &&
11550  "__chkstk is only supported on Windows");
11551  assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11552 
11553  // __chkstk takes the number of words to allocate on the stack in R4, and
11554  // returns the stack adjustment in number of bytes in R4. This will not
11555  // clober any other registers (other than the obvious lr).
11556  //
11557  // Although, technically, IP should be considered a register which may be
11558  // clobbered, the call itself will not touch it. Windows on ARM is a pure
11559  // thumb-2 environment, so there is no interworking required. As a result, we
11560  // do not expect a veneer to be emitted by the linker, clobbering IP.
11561  //
11562  // Each module receives its own copy of __chkstk, so no import thunk is
11563  // required, again, ensuring that IP is not clobbered.
11564  //
11565  // Finally, although some linkers may theoretically provide a trampoline for
11566  // out of range calls (which is quite common due to a 32M range limitation of
11567  // branches for Thumb), we can generate the long-call version via
11568  // -mcmodel=large, alleviating the need for the trampoline which may clobber
11569  // IP.
11570 
11571  switch (TM.getCodeModel()) {
11572  case CodeModel::Tiny:
11573  llvm_unreachable("Tiny code model not available on ARM.");
11574  case CodeModel::Small:
11575  case CodeModel::Medium:
11576  case CodeModel::Kernel:
11577  BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11578  .add(predOps(ARMCC::AL))
11579  .addExternalSymbol("__chkstk")
11582  .addReg(ARM::R12,
11584  .addReg(ARM::CPSR,
11586  break;
11587  case CodeModel::Large: {
11589  Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11590 
11591  BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11592  .addExternalSymbol("__chkstk");
11593  BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent())))
11594  .add(predOps(ARMCC::AL))
11598  .addReg(ARM::R12,
11600  .addReg(ARM::CPSR,
11602  break;
11603  }
11604  }
11605 
11606  BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11607  .addReg(ARM::SP, RegState::Kill)
11610  .add(predOps(ARMCC::AL))
11611  .add(condCodeOp());
11612 
11613  MI.eraseFromParent();
11614  return MBB;
11615 }
11616 
11618 ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11619  MachineBasicBlock *MBB) const {
11620  DebugLoc DL = MI.getDebugLoc();
11621  MachineFunction *MF = MBB->getParent();
11622  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11623 
11625  MF->insert(++MBB->getIterator(), ContBB);
11626  ContBB->splice(ContBB->begin(), MBB,
11627  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11629  MBB->addSuccessor(ContBB);
11630 
11632  BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11633  MF->push_back(TrapBB);
11634  MBB->addSuccessor(TrapBB);
11635 
11636  BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11637  .addReg(MI.getOperand(0).getReg())
11638  .addImm(0)
11639  .add(predOps(ARMCC::AL));
11640  BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11641  .addMBB(TrapBB)
11642  .addImm(ARMCC::EQ)
11643  .addReg(ARM::CPSR);
11644 
11645  MI.eraseFromParent();
11646  return ContBB;
11647 }
11648 
11649 // The CPSR operand of SelectItr might be missing a kill marker
11650 // because there were multiple uses of CPSR, and ISel didn't know
11651 // which to mark. Figure out whether SelectItr should have had a
11652 // kill marker, and set it if it should. Returns the correct kill
11653 // marker value.
11656  const TargetRegisterInfo* TRI) {
11657  // Scan forward through BB for a use/def of CPSR.
11658  MachineBasicBlock::iterator miI(std::next(SelectItr));
11659  for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11660  const MachineInstr& mi = *miI;
11661  if (mi.readsRegister(ARM::CPSR))
11662  return false;
11663  if (mi.definesRegister(ARM::CPSR))
11664  break; // Should have kill-flag - update below.
11665  }
11666 
11667  // If we hit the end of the block, check whether CPSR is live into a
11668  // successor.
11669  if (miI == BB->end()) {
11670  for (MachineBasicBlock *Succ : BB->successors())
11671  if (Succ->isLiveIn(ARM::CPSR))
11672  return false;
11673  }
11674 
11675  // We found a def, or hit the end of the basic block and CPSR wasn't live
11676  // out. SelectMI should have a kill flag on CPSR.
11677  SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11678  return true;
11679 }
11680 
11681 /// Adds logic in loop entry MBB to calculate loop iteration count and adds
11682 /// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11684  MachineBasicBlock *TpLoopBody,
11685  MachineBasicBlock *TpExit, Register OpSizeReg,
11686  const TargetInstrInfo *TII, DebugLoc Dl,
11688  // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11689  Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11690  BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11691  .addUse(OpSizeReg)
11692  .addImm(15)
11693  .add(predOps(ARMCC::AL))
11694  .addReg(0);
11695 
11696  Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11697  BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11698  .addUse(AddDestReg, RegState::Kill)
11699  .addImm(4)
11700  .add(predOps(ARMCC::AL))
11701  .addReg(0);
11702 
11703  Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11704  BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11705  .addUse(LsrDestReg, RegState::Kill);
11706 
11707  BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11708  .addUse(TotalIterationsReg)
11709  .addMBB(TpExit);
11710 
11711  BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11712  .addMBB(TpLoopBody)
11713  .add(predOps(ARMCC::AL));
11714 
11715  return TotalIterationsReg;
11716 }
11717 
11718 /// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11719 /// t2DoLoopEnd. These are used by later passes to generate tail predicated
11720 /// loops.
11721 static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11722  MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11723  const TargetInstrInfo *TII, DebugLoc Dl,
11724  MachineRegisterInfo &MRI, Register OpSrcReg,
11725  Register OpDestReg, Register ElementCountReg,
11726  Register TotalIterationsReg, bool IsMemcpy) {
11727  // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11728  // array, loop iteration counter, predication counter.
11729 
11730  Register SrcPhiReg, CurrSrcReg;
11731  if (IsMemcpy) {
11732  // Current position in the src array
11733  SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11734  CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11735  BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11736  .addUse(OpSrcReg)
11737  .addMBB(TpEntry)
11738  .addUse(CurrSrcReg)
11739  .addMBB(TpLoopBody);
11740  }
11741 
11742  // Current position in the dest array
11743  Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11744  Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11745  BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11746  .addUse(OpDestReg)
11747  .addMBB(TpEntry)
11748  .addUse(CurrDestReg)
11749  .addMBB(TpLoopBody);
11750 
11751  // Current loop counter
11752  Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11753  Register RemainingLoopIterationsReg =
11754  MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11755  BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11756  .addUse(TotalIterationsReg)
11757  .addMBB(TpEntry)
11758  .addUse(RemainingLoopIterationsReg)
11759  .addMBB(TpLoopBody);
11760 
11761  // Predication counter
11762  Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11763  Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11764  BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11765  .addUse(ElementCountReg)
11766  .addMBB(TpEntry)
11767  .addUse(RemainingElementsReg)
11768  .addMBB(TpLoopBody);
11769 
11770  // Pass predication counter to VCTP
11771  Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11772  BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11773  .addUse(PredCounterPhiReg)
11775  .addReg(0)
11776  .addReg(0);
11777 
11778  BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11779  .addUse(PredCounterPhiReg)
11780  .addImm(16)
11781  .add(predOps(ARMCC::AL))
11782  .addReg(0);
11783 
11784  // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11785  Register SrcValueReg;
11786  if (IsMemcpy) {
11787  SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11788  BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11789  .addDef(CurrSrcReg)
11790  .addDef(SrcValueReg)
11791  .addReg(SrcPhiReg)
11792  .addImm(16)
11794  .addUse(VccrReg)
11795  .addReg(0);
11796  } else
11797  SrcValueReg = OpSrcReg;
11798 
11799  BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11800  .addDef(CurrDestReg)
11801  .addUse(SrcValueReg)
11802  .addReg(DestPhiReg)
11803  .addImm(16)
11805  .addUse(VccrReg)
11806  .addReg(0);
11807 
11808  // Add the pseudoInstrs for decrementing the loop counter and marking the
11809  // end:t2DoLoopDec and t2DoLoopEnd
11810  BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11811  .addUse(LoopCounterPhiReg)
11812  .addImm(1);
11813 
11814  BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11815  .addUse(RemainingLoopIterationsReg)
11816  .addMBB(TpLoopBody);
11817 
11818  BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11819  .addMBB(TpExit)
11820  .add(predOps(ARMCC::AL));
11821 }
11822 
11825  MachineBasicBlock *BB) const {
11826  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11827  DebugLoc dl = MI.getDebugLoc();
11828  bool isThumb2 = Subtarget->isThumb2();
11829  switch (MI.getOpcode()) {
11830  default: {
11831  MI.print(errs());
11832  llvm_unreachable("Unexpected instr type to insert");
11833  }
11834 
11835  // Thumb1 post-indexed loads are really just single-register LDMs.
11836  case ARM::tLDR_postidx: {
11837  MachineOperand Def(MI.getOperand(1));
11838  BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
11839  .add(Def) // Rn_wb
11840  .add(MI.getOperand(2)) // Rn
11841  .add(MI.getOperand(3)) // PredImm
11842  .add(MI.getOperand(4)) // PredReg
11843  .add(MI.getOperand(0)) // Rt
11844  .cloneMemRefs(MI);
11845  MI.eraseFromParent();
11846  return BB;
11847  }
11848 
11849  case ARM::MVE_MEMCPYLOOPINST:
11850  case ARM::MVE_MEMSETLOOPINST: {
11851 
11852  // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
11853  // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
11854  // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
11855  // adds the relevant instructions in the TP loop Body for generation of a
11856  // WLSTP loop.
11857 
11858  // Below is relevant portion of the CFG after the transformation.
11859  // The Machine Basic Blocks are shown along with branch conditions (in
11860  // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
11861  // portion of the CFG and may not necessarily be the entry/exit of the
11862  // function.
11863 
11864  // (Relevant) CFG after transformation:
11865  // TP entry MBB
11866  // |
11867  // |-----------------|
11868  // (n <= 0) (n > 0)
11869  // | |
11870  // | TP loop Body MBB<--|
11871  // | | |
11872  // \ |___________|
11873  // \ /
11874  // TP exit MBB
11875 
11876  MachineFunction *MF = BB->getParent();
11877  MachineFunctionProperties &Properties = MF->getProperties();
11879 
11880  Register OpDestReg = MI.getOperand(0).getReg();
11881  Register OpSrcReg = MI.getOperand(1).getReg();
11882  Register OpSizeReg = MI.getOperand(2).getReg();
11883 
11884  // Allocate the required MBBs and add to parent function.
11885  MachineBasicBlock *TpEntry = BB;
11886  MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
11887  MachineBasicBlock *TpExit;
11888 
11889  MF->push_back(TpLoopBody);
11890 
11891  // If any instructions are present in the current block after
11892  // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
11893  // move the instructions into the newly created exit block. If there are no
11894  // instructions add an explicit branch to the FallThrough block and then
11895  // split.
11896  //
11897  // The split is required for two reasons:
11898  // 1) A terminator(t2WhileLoopStart) will be placed at that site.
11899  // 2) Since a TPLoopBody will be added later, any phis in successive blocks
11900  // need to be updated. splitAt() already handles this.
11901  TpExit = BB->splitAt(MI, false);
11902  if (TpExit == BB) {
11903  assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
11904  "block containing memcpy/memset Pseudo");
11905  TpExit = BB->getFallThrough();
11906  BuildMI(BB, dl, TII->get(ARM::t2B))
11907  .addMBB(TpExit)
11908  .add(predOps(ARMCC::AL));
11909  TpExit = BB->splitAt(MI, false);
11910  }
11911 
11912  // Add logic for iteration count
11913  Register TotalIterationsReg =
11914  genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
11915 
11916  // Add the vectorized (and predicated) loads/store instructions
11917  bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
11918  genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
11919  OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
11920 
11921  // Required to avoid conflict with the MachineVerifier during testing.
11923 
11924  // Connect the blocks
11925  TpEntry->addSuccessor(TpLoopBody);
11926  TpLoopBody->addSuccessor(TpLoopBody);
11927  TpLoopBody->addSuccessor(TpExit);
11928 
11929  // Reorder for a more natural layout
11930  TpLoopBody->moveAfter(TpEntry);
11931  TpExit->moveAfter(TpLoopBody);
11932 
11933  // Finally, remove the memcpy Psuedo Instruction
11934  MI.eraseFromParent();
11935 
11936  // Return the exit block as it may contain other instructions requiring a
11937  // custom inserter
11938  return TpExit;
11939  }
11940 
11941  // The Thumb2 pre-indexed stores have the same MI operands, they just
11942  // define them differently in the .td files from the isel patterns, so
11943  // they need pseudos.
11944  case ARM::t2STR_preidx:
11945  MI.setDesc(TII->get(ARM::t2STR_PRE));
11946  return BB;
11947  case ARM::t2STRB_preidx:
11948  MI.setDesc(TII->get(ARM::t2STRB_PRE));
11949  return BB;
11950  case ARM::t2STRH_preidx:
11951  MI.setDesc(TII->get(ARM::t2STRH_PRE));
11952  return BB;
11953 
11954  case ARM::STRi_preidx:
11955  case ARM::STRBi_preidx: {
11956  unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
11957  : ARM::STRB_PRE_IMM;
11958  // Decode the offset.
11959  unsigned Offset = MI.getOperand(4).getImm();
11960  bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
11962  if (isSub)
11963  Offset = -Offset;
11964 
11965  MachineMemOperand *MMO = *MI.memoperands_begin();
11966  BuildMI(*BB, MI, dl, TII->get(NewOpc))
11967  .add(MI.getOperand(0)) // Rn_wb
11968  .add(MI.getOperand(1)) // Rt
11969  .add(MI.getOperand(2)) // Rn
11970  .addImm(Offset) // offset (skip GPR==zero_reg)
11971  .add(MI.getOperand(5)) // pred
11972  .add(MI.getOperand(6))
11973  .addMemOperand(MMO);
11974  MI.eraseFromParent();
11975  return BB;
11976  }
11977  case ARM::STRr_preidx:
11978  case ARM::STRBr_preidx:
11979  case ARM::STRH_preidx: {
11980  unsigned NewOpc;
11981  switch (MI.getOpcode()) {
11982  default: llvm_unreachable("unexpected opcode!");
11983  case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
11984  case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
11985  case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
11986  }
11987  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
11988  for (const MachineOperand &MO : MI.operands())
11989  MIB.add(MO);
11990  MI.eraseFromParent();
11991  return BB;
11992  }
11993 
11994  case ARM::tMOVCCr_pseudo: {
11995  // To "insert" a SELECT_CC instruction, we actually have to insert the
11996  // diamond control-flow pattern. The incoming instruction knows the
11997  // destination vreg to set, the condition code register to branch on, the
11998  // true/false values to select between, and a branch opcode to use.
11999  const BasicBlock *LLVM_BB = BB->getBasicBlock();
12000  MachineFunction::iterator It = ++BB->getIterator();
12001 
12002  // thisMBB:
12003  // ...
12004  // TrueVal = ...
12005  // cmpTY ccX, r1, r2
12006  // bCC copy1MBB
12007  // fallthrough --> copy0MBB
12008  MachineBasicBlock *thisMBB = BB;
12009  MachineFunction *F = BB->getParent();
12010  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12011  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12012  F->insert(It, copy0MBB);
12013  F->insert(It, sinkMBB);
12014 
12015  // Check whether CPSR is live past the tMOVCCr_pseudo.
12016  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12017  if (!MI.killsRegister(ARM::CPSR) &&
12018  !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12019  copy0MBB->addLiveIn(ARM::CPSR);
12020  sinkMBB->addLiveIn(ARM::CPSR);
12021  }
12022 
12023  // Transfer the remainder of BB and its successor edges to sinkMBB.
12024  sinkMBB->splice(sinkMBB->begin(), BB,
12025  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12027 
12028  BB->addSuccessor(copy0MBB);
12029  BB->addSuccessor(sinkMBB);
12030 
12031  BuildMI(BB, dl, TII->get(ARM::tBcc))
12032  .addMBB(sinkMBB)
12033  .addImm(MI.getOperand(3).getImm())
12034  .addReg(MI.getOperand(4).getReg());
12035 
12036  // copy0MBB:
12037  // %FalseValue = ...
12038  // # fallthrough to sinkMBB
12039  BB = copy0MBB;
12040 
12041  // Update machine-CFG edges
12042  BB->addSuccessor(sinkMBB);
12043 
12044  // sinkMBB:
12045  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12046  // ...
12047  BB = sinkMBB;
12048  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12049  .addReg(MI.getOperand(1).getReg())
12050  .addMBB(copy0MBB)
12051  .addReg(MI.getOperand(2).getReg())
12052  .addMBB(thisMBB);
12053 
12054  MI.eraseFromParent(); // The pseudo instruction is gone now.
12055  return BB;
12056  }
12057 
12058  case ARM::BCCi64:
12059  case ARM::BCCZi64: {
12060  // If there is an unconditional branch to the other successor, remove it.
12061  BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12062 
12063  // Compare both parts that make up the double comparison separately for
12064  // equality.
12065  bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12066 
12067  Register LHS1 = MI.getOperand(1).getReg();
12068  Register LHS2 = MI.getOperand(2).getReg();
12069  if (RHSisZero) {
12070  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12071  .addReg(LHS1)
12072  .addImm(0)
12073  .add(predOps(ARMCC::AL));
12074  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12075  .addReg(LHS2).addImm(0)
12076  .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12077  } else {
12078  Register RHS1 = MI.getOperand(3).getReg();
12079  Register RHS2 = MI.getOperand(4).getReg();
12080  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12081  .addReg(LHS1)
12082  .addReg(RHS1)
12083  .add(predOps(ARMCC::AL));
12084  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12085  .addReg(LHS2).addReg(RHS2)
12086  .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12087  }
12088 
12089  MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12090  MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12091  if (MI.getOperand(0).getImm() == ARMCC::NE)
12092  std::swap(destMBB, exitMBB);
12093 
12094  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12095  .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12096  if (isThumb2)
12097  BuildMI(BB, dl, TII->get(ARM::t2B))
12098  .addMBB(exitMBB)
12099  .add(predOps(ARMCC::AL));
12100  else
12101  BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12102 
12103  MI.eraseFromParent(); // The pseudo instruction is gone now.
12104  return BB;
12105  }
12106 
12107  case ARM::Int_eh_sjlj_setjmp:
12108  case ARM::Int_eh_sjlj_setjmp_nofp:
12109  case ARM::tInt_eh_sjlj_setjmp:
12110  case ARM::t2Int_eh_sjlj_setjmp:
12111  case ARM::t2Int_eh_sjlj_setjmp_nofp:
12112  return BB;
12113 
12114  case ARM::Int_eh_sjlj_setup_dispatch:
12115  EmitSjLjDispatchBlock(MI, BB);
12116  return BB;
12117 
12118  case ARM::ABS:
12119  case ARM::t2ABS: {
12120  // To insert an ABS instruction, we have to insert the
12121  // diamond control-flow pattern. The incoming instruction knows the
12122  // source vreg to test against 0, the destination vreg to set,
12123  // the condition code register to branch on, the
12124  // true/false values to select between, and a branch opcode to use.
12125  // It transforms
12126  // V1 = ABS V0
12127  // into
12128  // V2 = MOVS V0
12129  // BCC (branch to SinkBB if V0 >= 0)
12130  // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12131  // SinkBB: V1 = PHI(V2, V3)
12132  const BasicBlock *LLVM_BB = BB->getBasicBlock();
12133  MachineFunction::iterator BBI = ++BB->getIterator();
12134  MachineFunction *Fn = BB->getParent();
12135  MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12136  MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12137  Fn->insert(BBI, RSBBB);
12138  Fn->insert(BBI, SinkBB);
12139 
12140  Register ABSSrcReg = MI.getOperand(1).getReg();
12141  Register ABSDstReg = MI.getOperand(0).getReg();
12142  bool ABSSrcKIll = MI.getOperand(1).isKill();
12143  bool isThumb2 = Subtarget->isThumb2();
12145  // In Thumb mode S must not be specified if source register is the SP or
12146  // PC and if destination register is the SP, so restrict register class
12147  Register NewRsbDstReg = MRI.createVirtualRegister(
12148  isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12149 
12150  // Transfer the remainder of BB and its successor edges to sinkMBB.
12151  SinkBB->splice(SinkBB->begin(), BB,
12152  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12154 
12155  BB->addSuccessor(RSBBB);
12156  BB->addSuccessor(SinkBB);
12157 
12158  // fall through to SinkMBB
12159  RSBBB->addSuccessor(SinkBB);
12160 
12161  // insert a cmp at the end of BB
12162  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12163  .addReg(ABSSrcReg)
12164  .addImm(0)
12165  .add(predOps(ARMCC::AL));
12166 
12167  // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12168  BuildMI(BB, dl,
12169  TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12171 
12172  // insert rsbri in RSBBB
12173  // Note: BCC and rsbri will be converted into predicated rsbmi
12174  // by if-conversion pass
12175  BuildMI(*RSBBB, RSBBB->begin(), dl,
12176  TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12177  .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12178  .addImm(0)
12179  .add(predOps(ARMCC::AL))
12180  .add(condCodeOp());
12181 
12182  // insert PHI in SinkBB,
12183  // reuse ABSDstReg to not change uses of ABS instruction
12184  BuildMI(*SinkBB, SinkBB->begin(), dl,
12185  TII->get(ARM::PHI), ABSDstReg)
12186  .addReg(NewRsbDstReg).addMBB(RSBBB)
12187  .addReg(ABSSrcReg).addMBB(BB);
12188 
12189  // remove ABS instruction
12190  MI.eraseFromParent();
12191 
12192  // return last added BB
12193  return SinkBB;
12194  }
12195  case ARM::COPY_STRUCT_BYVAL_I32:
12196  ++NumLoopByVals;
12197  return EmitStructByval(MI, BB);
12198  case ARM::WIN__CHKSTK:
12199  return EmitLowered__chkstk(MI, BB);
12200  case ARM::WIN__DBZCHK:
12201  return EmitLowered__dbzchk(MI, BB);
12202  }
12203 }
12204 
12205 /// Attaches vregs to MEMCPY that it will use as scratch registers
12206 /// when it is expanded into LDM/STM. This is done as a post-isel lowering
12207 /// instead of as a custom inserter because we need the use list from the SDNode.
12208 static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12209  MachineInstr &MI, const SDNode *Node) {
12210  bool isThumb1 = Subtarget->isThumb1Only();
12211 
12212  DebugLoc DL = MI.getDebugLoc();
12213  MachineFunction *MF = MI.getParent()->getParent();
12215  MachineInstrBuilder MIB(*MF, MI);
12216 
12217  // If the new dst/src is unused mark it as dead.
12218  if (!Node->hasAnyUseOfValue(0)) {
12219  MI.getOperand(0).setIsDead(true);
12220  }
12221  if (!Node->hasAnyUseOfValue(1)) {
12222  MI.getOperand(1).setIsDead(true);
12223  }
12224 
12225  // The MEMCPY both defines and kills the scratch registers.
12226  for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12227  Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12228  : &ARM::GPRRegClass);
12229  MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
12230  }
12231 }
12232 
12234  SDNode *Node) const {
12235  if (MI.getOpcode() == ARM::MEMCPY) {
12236  attachMEMCPYScratchRegs(Subtarget, MI, Node);
12237  return;
12238  }
12239 
12240  const MCInstrDesc *MCID = &MI.getDesc();
12241  // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12242  // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12243  // operand is still set to noreg. If needed, set the optional operand's
12244  // register to CPSR, and remove the redundant implicit def.
12245  //
12246  // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12247 
12248  // Rename pseudo opcodes.
12249  unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12250  unsigned ccOutIdx;
12251  if (NewOpc) {
12252  const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12253  MCID = &TII->get(NewOpc);
12254 
12255  assert(MCID->getNumOperands() ==
12256  MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12257  && "converted opcode should be the same except for cc_out"
12258  " (and, on Thumb1, pred)");
12259 
12260  MI.setDesc(*MCID);
12261 
12262  // Add the optional cc_out operand
12263  MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12264 
12265  // On Thumb1, move all input operands to the end, then add the predicate
12266  if (Subtarget->isThumb1Only()) {
12267  for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12268  MI.addOperand(MI.getOperand(1));
12269  MI.removeOperand(1);
12270  }
12271 
12272  // Restore the ties
12273  for (unsigned i = MI.getNumOperands(); i--;) {
12274  const MachineOperand& op = MI.getOperand(i);
12275  if (op.isReg() && op.isUse()) {
12276  int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12277  if (DefIdx != -1)
12278  MI.tieOperands(DefIdx, i);
12279  }
12280  }
12281 
12282  MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));
12283  MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12284  ccOutIdx = 1;
12285  } else
12286  ccOutIdx = MCID->getNumOperands() - 1;
12287  } else
12288  ccOutIdx = MCID->getNumOperands() - 1;
12289 
12290  // Any ARM instruction that sets the 's' bit should specify an optional
12291  // "cc_out" operand in the last operand position.
12292  if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12293  assert(!NewOpc && "Optional cc_out operand required");
12294  return;
12295  }
12296  // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12297  // since we already have an optional CPSR def.
12298  bool definesCPSR = false;
12299  bool deadCPSR = false;
12300  for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12301  ++i) {
12302  const MachineOperand &MO = MI.getOperand(i);
12303  if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12304  definesCPSR = true;
12305  if (MO.isDead())
12306  deadCPSR = true;
12307  MI.removeOperand(i);
12308  break;
12309  }
12310  }
12311  if (!definesCPSR) {
12312  assert(!NewOpc && "Optional cc_out operand required");
12313  return;
12314  }
12315  assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12316  if (deadCPSR) {
12317  assert(!MI.getOperand(ccOutIdx).getReg() &&
12318  "expect uninitialized optional cc_out operand");
12319  // Thumb1 instructions must have the S bit even if the CPSR is dead.
12320  if (!Subtarget->isThumb1Only())
12321  return;
12322  }
12323 
12324  // If this instruction was defined with an optional CPSR def and its dag node
12325  // had a live implicit CPSR def, then activate the optional CPSR def.
12326  MachineOperand &MO = MI.getOperand(ccOutIdx);
12327  MO.setReg(ARM::CPSR);
12328  MO.setIsDef(true);
12329 }
12330 
12331 //===----------------------------------------------------------------------===//
12332 // ARM Optimization Hooks
12333 //===----------------------------------------------------------------------===//
12334 
12335 // Helper function that checks if N is a null or all ones constant.
12336 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12338 }
12339 
12340 // Return true if N is conditionally 0 or all ones.
12341 // Detects these expressions where cc is an i1 value:
12342 //
12343 // (select cc 0, y) [AllOnes=0]
12344 // (select cc y, 0) [AllOnes=0]
12345 // (zext cc) [AllOnes=0]
12346 // (sext cc) [AllOnes=0/1]
12347 // (select cc -1, y) [AllOnes=1]
12348 // (select cc y, -1) [AllOnes=1]
12349 //
12350 // Invert is set when N is the null/all ones constant when CC is false.
12351 // OtherOp is set to the alternative value of N.
12352 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
12353  SDValue &CC, bool &Invert,
12354  SDValue &OtherOp,
12355  SelectionDAG &DAG) {
12356  switch (N->getOpcode()) {
12357  default: return false;
12358  case ISD::SELECT: {
12359  CC = N->getOperand(0);
12360  SDValue N1 = N->getOperand(1);
12361  SDValue N2 = N->getOperand(2);
12362  if (isZeroOrAllOnes(N1, AllOnes)) {
12363  Invert = false;
12364  OtherOp = N2;
12365  return true;
12366  }
12367  if (isZeroOrAllOnes(N2, AllOnes)) {
12368  Invert = true;
12369  OtherOp = N1;
12370  return true;
12371  }
12372  return false;
12373  }
12374  case ISD::ZERO_EXTEND:
12375  // (zext cc) can never be the all ones value.
12376  if (AllOnes)
12377  return false;
12378  [[fallthrough]];
12379  case ISD::SIGN_EXTEND: {
12380  SDLoc dl(N);
12381  EVT VT = N->getValueType(0);
12382  CC = N->getOperand(0);
12383  if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12384  return false;
12385  Invert = !AllOnes;
12386  if (AllOnes)
12387  // When looking for an AllOnes constant, N is an sext, and the 'other'
12388  // value is 0.
12389  OtherOp = DAG.getConstant(0, dl, VT);
12390  else if (N->getOpcode() == ISD::ZERO_EXTEND)
12391  // When looking for a 0 constant, N can be zext or sext.
12392  OtherOp = DAG.getConstant(1, dl, VT);
12393  else
12394  OtherOp = DAG.getAllOnesConstant(dl, VT);
12395  return true;
12396  }
12397  }
12398 }
12399 
12400 // Combine a constant select operand into its use:
12401 //
12402 // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12403 // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12404 // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12405 // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12406 // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12407 //
12408 // The transform is rejected if the select doesn't have a constant operand that
12409 // is null, or all ones when AllOnes is set.
12410 //
12411 // Also recognize sext/zext from i1:
12412 //
12413 // (add (zext cc), x) -> (select cc (add x, 1), x)
12414 // (add (sext cc), x) -> (select cc (add x, -1), x)
12415 //
12416 // These transformations eventually create predicated instructions.
12417 //
12418 // @param N The node to transform.
12419 // @param Slct The N operand that is a select.
12420 // @param OtherOp The other N operand (x above).
12421 // @param DCI Context.
12422 // @param AllOnes Require the select constant to be all ones instead of null.
12423 // @returns The new node, or SDValue() on failure.
12424 static
12427  bool AllOnes = false) {
12428  SelectionDAG &DAG = DCI.DAG;
12429  EVT VT = N->getValueType(0);
12430  SDValue NonConstantVal;
12431  SDValue CCOp;
12432  bool SwapSelectOps;
12433  if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12434  NonConstantVal, DAG))
12435  return SDValue();
12436 
12437  // Slct is now know to be the desired identity constant when CC is true.
12438  SDValue TrueVal = OtherOp;
12439  SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12440  OtherOp, NonConstantVal);
12441  // Unless SwapSelectOps says CC should be false.
12442  if (SwapSelectOps)
12444 
12445  return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12446  CCOp, TrueVal, FalseVal);
12447 }
12448 
12449 // Attempt combineSelectAndUse on each operand of a commutative operator N.
12450 static
12453  SDValue N0 = N->getOperand(0);
12454  SDValue N1 = N->getOperand(1);
12455  if (N0.getNode()->hasOneUse())
12456  if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12457  return Result;
12458  if (N1.getNode()->hasOneUse())
12459  if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12460  return Result;
12461  return SDValue();
12462 }
12463 
12464 static bool IsVUZPShuffleNode(SDNode *N) {
12465  // VUZP shuffle node.
12466  if (N->getOpcode() == ARMISD::VUZP)
12467  return true;
12468 
12469  // "VUZP" on i32 is an alias for VTRN.
12470  if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12471  return true;
12472 
12473  return false;
12474 }
12475 
12478  const ARMSubtarget *Subtarget) {
12479  // Look for ADD(VUZP.0, VUZP.1).
12480  if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12481  N0 == N1)
12482  return SDValue();
12483 
12484  // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12485  if (!N->getValueType(0).is64BitVector())
12486  return SDValue();
12487 
12488  // Generate vpadd.
12489  SelectionDAG &DAG = DCI.DAG;
12490  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12491  SDLoc dl(N);
12492  SDNode *Unzip = N0.getNode();
12493  EVT VT = N->getValueType(0);
12494 
12496  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12497  TLI.getPointerTy(DAG.getDataLayout())));
12498  Ops.push_back(Unzip->getOperand(0));
12499  Ops.push_back(Unzip->getOperand(1));
12500 
12501  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12502 }
12503 
12506  const ARMSubtarget *Subtarget) {
12507  // Check for two extended operands.
12508  if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12509  N1.getOpcode() == ISD::SIGN_EXTEND) &&
12510  !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12511  N1.getOpcode() == ISD::ZERO_EXTEND))
12512  return SDValue();
12513 
12514  SDValue N00 = N0.getOperand(0);
12515  SDValue N10 = N1.getOperand(0);
12516 
12517  // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12518  if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12519  N00 == N10)
12520  return SDValue();
12521 
12522  // We only recognize Q register paddl here; this can't be reached until
12523  // after type legalization.
12524  if (!N00.getValueType().is64BitVector() ||
12525  !N0.getValueType().is128BitVector())
12526  return SDValue();
12527 
12528  // Generate vpaddl.
12529  SelectionDAG &DAG = DCI.DAG;
12530  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12531  SDLoc dl(N);
12532  EVT VT = N->getValueType(0);
12533 
12535  // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12536  unsigned Opcode;
12537  if (N0.getOpcode() == ISD::SIGN_EXTEND)
12538  Opcode = Intrinsic::arm_neon_vpaddls;
12539  else
12540  Opcode = Intrinsic::arm_neon_vpaddlu;
12541  Ops.push_back(DAG.getConstant(Opcode, dl,
12542  TLI.getPointerTy(DAG.getDataLayout())));
12543  EVT ElemTy = N00.getValueType().getVectorElementType();
12544  unsigned NumElts = VT.getVectorNumElements();
12545  EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12546  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12547  N00.getOperand(0), N00.getOperand(1));
12548  Ops.push_back(Concat);
12549 
12550  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12551 }
12552 
12553 // FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12554 // an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12555 // much easier to match.
12556 static SDValue
12559  const ARMSubtarget *Subtarget) {
12560  // Only perform optimization if after legalize, and if NEON is available. We
12561  // also expected both operands to be BUILD_VECTORs.
12562  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12563  || N0.getOpcode() != ISD::BUILD_VECTOR
12564  || N1.getOpcode() != ISD::BUILD_VECTOR)
12565  return SDValue();
12566 
12567  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12568  EVT VT = N->getValueType(0);
12569  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12570  return SDValue();
12571 
12572  // Check that the vector operands are of the right form.
12573  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12574  // operands, where N is the size of the formed vector.
12575  // Each EXTRACT_VECTOR should have the same input vector and odd or even
12576  // index such that we have a pair wise add pattern.
12577 
12578  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12579  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12580  return SDValue();
12581  SDValue Vec = N0->getOperand(0)->getOperand(0);
12582  SDNode *V = Vec.getNode();
12583  unsigned nextIndex = 0;
12584 
12585  // For each operands to the ADD which are BUILD_VECTORs,
12586  // check to see if each of their operands are an EXTRACT_VECTOR with
12587  // the same vector and appropriate index.
12588  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12591 
12592  SDValue ExtVec0 = N0->getOperand(i);
12593  SDValue ExtVec1 = N1->getOperand(i);
12594 
12595  // First operand is the vector, verify its the same.
12596  if (V != ExtVec0->getOperand(0).getNode() ||
12597  V != ExtVec1->getOperand(0).getNode())
12598  return SDValue();
12599 
12600  // Second is the constant, verify its correct.
12601  ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12602  ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12603 
12604  // For the constant, we want to see all the even or all the odd.
12605  if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12606  || C1->getZExtValue() != nextIndex+1)
12607  return SDValue();
12608 
12609  // Increment index.
12610  nextIndex+=2;
12611  } else
12612  return SDValue();
12613  }
12614 
12615  // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12616  // we're using the entire input vector, otherwise there's a size/legality
12617  // mismatch somewhere.
12618  if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12620  return SDValue();
12621 
12622  // Create VPADDL node.
12623  SelectionDAG &DAG = DCI.DAG;
12624  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12625 
12626  SDLoc dl(N);
12627 
12628  // Build operand list.
12630  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12631  TLI.getPointerTy(DAG.getDataLayout())));
12632 
12633  // Input is the vector.
12634  Ops.push_back(Vec);
12635 
12636  // Get widened type and narrowed type.
12637  MVT widenType;
12638  unsigned numElem = VT.getVectorNumElements();
12639 
12640  EVT inputLaneType = Vec.getValueType().getVectorElementType();
12641  switch (inputLaneType.getSimpleVT().SimpleTy) {
12642  case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12643  case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12644  case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12645  default:
12646  llvm_unreachable("Invalid vector element type for padd optimization.");
12647  }
12648 
12649  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12650  unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12651  return DAG.getNode(ExtOp, dl, VT, tmp);
12652 }
12653 
12655  if (V->getOpcode() == ISD::UMUL_LOHI ||
12656  V->getOpcode() == ISD::SMUL_LOHI)
12657  return V;
12658  return SDValue();
12659 }
12660 
12661 static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12663  const ARMSubtarget *Subtarget) {
12664  if (!Subtarget->hasBaseDSP())
12665  return SDValue();
12666 
12667  // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12668  // accumulates the product into a 64-bit value. The 16-bit values will
12669  // be sign extended somehow or SRA'd into 32-bit values
12670  // (addc (adde (mul 16bit, 16bit), lo), hi)
12671  SDValue Mul = AddcNode->getOperand(0);
12672  SDValue Lo = AddcNode->getOperand(1);
12673  if (Mul.getOpcode() != ISD::MUL) {
12674  Lo = AddcNode->getOperand(0);
12675  Mul = AddcNode->getOperand(1);
12676  if (Mul.getOpcode() != ISD::MUL)
12677  return SDValue();
12678  }
12679 
12680  SDValue SRA = AddeNode->getOperand(0);
12681  SDValue Hi = AddeNode->getOperand(1);
12682  if (SRA.getOpcode() != ISD::SRA) {
12683  SRA = AddeNode->getOperand(1);
12684  Hi = AddeNode->getOperand(0);
12685  if (SRA.getOpcode() != ISD::SRA)
12686  return SDValue();
12687  }
12688  if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12689  if (Const->getZExtValue() != 31)
12690  return SDValue();
12691  } else
12692  return SDValue();
12693 
12694  if (SRA.getOperand(0) != Mul)
12695  return SDValue();
12696 
12697  SelectionDAG &DAG = DCI.DAG;
12698  SDLoc dl(AddcNode);
12699  unsigned Opcode = 0;
12700  SDValue Op0;
12701  SDValue Op1;
12702 
12703  if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12704  Opcode = ARMISD::SMLALBB;
12705  Op0 = Mul.getOperand(0);
12706  Op1 = Mul.getOperand(1);
12707  } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12708  Opcode = ARMISD::SMLALBT;
12709  Op0 = Mul.getOperand(0);
12710  Op1 = Mul.getOperand(1).getOperand(0);
12711  } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12712  Opcode = ARMISD::SMLALTB;
12713  Op0 = Mul.getOperand(0).getOperand(0);
12714  Op1 = Mul.getOperand(1);
12715  } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12716  Opcode = ARMISD::SMLALTT;
12717  Op0 = Mul->getOperand(0).getOperand(0);
12718  Op1 = Mul->getOperand(1).getOperand(0);
12719  }
12720 
12721  if (!Op0 || !Op1)
12722  return SDValue();
12723 
12724  SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12725  Op0, Op1, Lo, Hi);
12726  // Replace the ADDs' nodes uses by the MLA node's values.
12727  SDValue HiMLALResult(SMLAL.getNode(), 1);
12728  SDValue LoMLALResult(SMLAL.getNode(), 0);
12729 
12730  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12731  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12732 
12733  // Return original node to notify the driver to stop replacing.
12734  SDValue resNode(AddcNode, 0);
12735  return resNode;
12736 }
12737 
12738 static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode,
12740  const ARMSubtarget *Subtarget) {
12741  // Look for multiply add opportunities.
12742  // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12743  // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12744  // a glue link from the first add to the second add.
12745  // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12746  // a S/UMLAL instruction.
12747  // UMUL_LOHI
12748  // / :lo \ :hi
12749  // V \ [no multiline comment]
12750  // loAdd -> ADDC |
12751  // \ :carry /
12752  // V V
12753  // ADDE <- hiAdd
12754  //
12755  // In the special case where only the higher part of a signed result is used
12756  // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12757  // a constant with the exact value of 0x80000000, we recognize we are dealing
12758  // with a "rounded multiply and add" (or subtract) and transform it into
12759  // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12760 
12761  assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12762  AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12763  "Expect an ADDE or SUBE");
12764 
12765  assert(AddeSubeNode->getNumOperands() == 3 &&
12766  AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12767  "ADDE node has the wrong inputs");
12768 
12769  // Check that we are chained to the right ADDC or SUBC node.
12770  SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12771  if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12772  AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12773  (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12774  AddcSubcNode->getOpcode() != ARMISD::SUBC))
12775  return SDValue();
12776 
12777  SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12778  SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12779 
12780  // Check if the two operands are from the same mul_lohi node.
12781  if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12782  return SDValue();
12783 
12784  assert(AddcSubcNode->getNumValues() == 2 &&
12785  AddcSubcNode->getValueType(0) == MVT::i32 &&
12786  "Expect ADDC with two result values. First: i32");
12787 
12788  // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12789  // maybe a SMLAL which multiplies two 16-bit values.
12790  if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12791  AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12792  AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12793  AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12794  AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12795  return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
12796 
12797  // Check for the triangle shape.
12798  SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12799  SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12800 
12801  // Make sure that the ADDE/SUBE operands are not coming from the same node.
12802  if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
12803  return SDValue();
12804 
12805  // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
12806  bool IsLeftOperandMUL = false;
12807  SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
12808  if (MULOp == SDValue())
12809  MULOp = findMUL_LOHI(AddeSubeOp1);
12810  else
12811  IsLeftOperandMUL = true;
12812  if (MULOp == SDValue())
12813  return SDValue();
12814 
12815  // Figure out the right opcode.
12816  unsigned Opc = MULOp->getOpcode();
12817  unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
12818 
12819  // Figure out the high and low input values to the MLAL node.
12820  SDValue *HiAddSub = nullptr;
12821  SDValue *LoMul = nullptr;
12822  SDValue *LowAddSub = nullptr;
12823 
12824  // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
12825  if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
12826  return SDValue();
12827 
12828  if (IsLeftOperandMUL)
12829  HiAddSub = &AddeSubeOp1;
12830  else
12831  HiAddSub = &AddeSubeOp0;
12832 
12833  // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
12834  // whose low result is fed to the ADDC/SUBC we are checking.
12835 
12836  if (AddcSubcOp0 == MULOp.getValue(0)) {
12837  LoMul = &AddcSubcOp0;
12838  LowAddSub = &AddcSubcOp1;
12839  }
12840  if (AddcSubcOp1 == MULOp.getValue(0)) {
12841  LoMul = &AddcSubcOp1;
12842  LowAddSub = &AddcSubcOp0;
12843  }
12844 
12845  if (!LoMul)
12846  return SDValue();
12847 
12848  // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
12849  // the replacement below will create a cycle.
12850  if (AddcSubcNode == HiAddSub->getNode() ||
12851  AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
12852  return SDValue();
12853 
12854  // Create the merged node.
12855  SelectionDAG &DAG = DCI.DAG;
12856 
12857  // Start building operand list.
12859  Ops.push_back(LoMul->getOperand(0));
12860  Ops.push_back(LoMul->getOperand(1));
12861 
12862  // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
12863  // the case, we must be doing signed multiplication and only use the higher
12864  // part of the result of the MLAL, furthermore the LowAddSub must be a constant
12865  // addition or subtraction with the value of 0x800000.
12866  if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
12867  FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
12868  LowAddSub->getNode()->getOpcode() == ISD::Constant &&
12869  static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
12870  0x80000000) {
12871  Ops.push_back(*HiAddSub);
12872  if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
12873  FinalOpc = ARMISD::SMMLSR;
12874  } else {
12875  FinalOpc = ARMISD::SMMLAR;
12876  }
12877  SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
12878  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
12879 
12880  return SDValue(AddeSubeNode, 0);
12881  } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
12882  // SMMLS is generated during instruction selection and the rest of this
12883  // function can not handle the case where AddcSubcNode is a SUBC.
12884  return SDValue();
12885 
12886  // Finish building the operand list for {U/S}MLAL
12887  Ops.push_back(*LowAddSub);
12888  Ops.push_back(*HiAddSub);
12889 
12890  SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
12891  DAG.getVTList(MVT::i32, MVT::i32), Ops);
12892 
12893  // Replace the ADDs' nodes uses by the MLA node's values.
12894  SDValue HiMLALResult(MLALNode.getNode(), 1);
12895  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
12896 
12897  SDValue LoMLALResult(MLALNode.getNode(), 0);
12898  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
12899 
12900  // Return original node to notify the driver to stop replacing.
12901  return SDValue(AddeSubeNode, 0);
12902 }
12903 
12906  const ARMSubtarget *Subtarget) {
12907  // UMAAL is similar to UMLAL except that it adds two unsigned values.
12908  // While trying to combine for the other MLAL nodes, first search for the
12909  // chance to use UMAAL. Check if Addc uses a node which has already
12910  // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
12911  // as the addend, and it's handled in PerformUMLALCombine.
12912 
12913  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12914  return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
12915 
12916  // Check that we have a glued ADDC node.
12917  SDNode* AddcNode = AddeNode->getOperand(2).getNode();
12918  if (AddcNode->getOpcode() != ARMISD::ADDC)
12919  return SDValue();
12920 
12921  // Find the converted UMAAL or quit if it doesn't exist.
12922  SDNode *UmlalNode = nullptr;
12923  SDValue AddHi;
12924  if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
12925  UmlalNode = AddcNode->getOperand(0).getNode();
12926  AddHi = AddcNode->getOperand(1);
12927  } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
12928  UmlalNode = AddcNode->getOperand(1).getNode();
12929  AddHi = AddcNode->getOperand(0);
12930  } else {
12931  return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
12932  }
12933 
12934  // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
12935  // the ADDC as well as Zero.
12936  if (!isNullConstant(UmlalNode->getOperand(3)))
12937  return SDValue();
12938 
12939  if ((isNullConstant(AddeNode->getOperand(0)) &&
12940  AddeNode->getOperand(1).getNode() == UmlalNode) ||
12941  (AddeNode->getOperand(0).getNode() == UmlalNode &&
12942  isNullConstant(AddeNode->getOperand(1)))) {
12943  SelectionDAG &DAG = DCI.DAG;
12944  SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
12945  UmlalNode->getOperand(2), AddHi };
12946  SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
12947  DAG.getVTList(MVT::i32, MVT::i32), Ops);
12948 
12949  // Replace the ADDs' nodes uses by the UMAAL node's values.
12950  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
12951  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
12952 
12953  // Return original node to notify the driver to stop replacing.
12954  return SDValue(AddeNode, 0);
12955  }
12956  return SDValue();
12957 }
12958 
12960  const ARMSubtarget *Subtarget) {
12961  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12962  return SDValue();
12963 
12964  // Check that we have a pair of ADDC and ADDE as operands.
12965  // Both addends of the ADDE must be zero.
12966  SDNode* AddcNode = N->getOperand(2).getNode();
12967  SDNode* AddeNode = N->getOperand(3).getNode();
12968  if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
12969  (AddeNode->getOpcode() == ARMISD::ADDE) &&
12970  isNullConstant(AddeNode->getOperand(0)) &&
12971  isNullConstant(AddeNode->getOperand(1)) &&
12972  (AddeNode->getOperand(2).getNode() == AddcNode))
12973  return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
12974  DAG.getVTList(MVT::i32, MVT::i32),
12975  {N->getOperand(0), N->getOperand(1),
12976  AddcNode->getOperand(0), AddcNode->getOperand(1)});
12977  else
12978  return SDValue();
12979 }
12980 
12983  const ARMSubtarget *Subtarget) {
12984  SelectionDAG &DAG(DCI.DAG);
12985 
12986  if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
12987  // (SUBC (ADDE 0, 0, C), 1) -> C
12988  SDValue LHS = N->getOperand(0);
12989  SDValue RHS = N->getOperand(1);
12990  if (LHS->getOpcode() == ARMISD::ADDE &&
12991  isNullConstant(LHS->getOperand(0)) &&
12992  isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
12993  return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
12994  }
12995  }
12996 
12997  if (Subtarget->isThumb1Only()) {
12998  SDValue RHS = N->getOperand(1);
12999  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13000  int32_t imm = C->getSExtValue();
13001  if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13002  SDLoc DL(N);
13003  RHS = DAG.getConstant(-imm, DL, MVT::i32);
13004  unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13005  : ARMISD::ADDC;
13006  return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13007  }
13008  }
13009  }
13010 
13011  return SDValue();
13012 }
13013 
13016  const ARMSubtarget *Subtarget) {
13017  if (Subtarget->isThumb1Only()) {
13018  SelectionDAG &DAG = DCI.DAG;
13019  SDValue RHS = N->getOperand(1);
13020  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13021  int64_t imm = C->getSExtValue();
13022  if (imm < 0) {
13023  SDLoc DL(N);
13024 
13025  // The with-carry-in form matches bitwise not instead of the negation.
13026  // Effectively, the inverse interpretation of the carry flag already
13027  // accounts for part of the negation.
13028  RHS = DAG.getConstant(~imm, DL, MVT::i32);
13029 
13030  unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13031  : ARMISD::ADDE;
13032  return DAG.getNode(Opcode, DL, N->getVTList(),
13033  N->getOperand(0), RHS, N->getOperand(2));
13034  }
13035  }
13036  } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13037  return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13038  }
13039  return SDValue();
13040 }
13041 
13044  const ARMSubtarget *Subtarget) {
13045  if (!Subtarget->hasMVEIntegerOps())
13046  return SDValue();
13047 
13048  SDLoc dl(N);
13049  SDValue SetCC;
13050  SDValue LHS;
13051  SDValue RHS;
13052  ISD::CondCode CC;
13053  SDValue TrueVal;
13054  SDValue FalseVal;
13055 
13056  if (N->getOpcode() == ISD::SELECT &&
13057  N->getOperand(0)->getOpcode() == ISD::SETCC) {
13058  SetCC = N->getOperand(0);
13059  LHS = SetCC->getOperand(0);
13060  RHS = SetCC->getOperand(1);
13061  CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13062  TrueVal = N->getOperand(1);
13063  FalseVal = N->getOperand(2);
13064  } else if (N->getOpcode() == ISD::SELECT_CC) {
13065  LHS = N->getOperand(0);
13066  RHS = N->getOperand(1);
13067  CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13068  TrueVal = N->getOperand(2);
13069  FalseVal = N->getOperand(3);
13070  } else {
13071  return SDValue();
13072  }
13073 
13074  unsigned int Opcode = 0;
13075  if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13076  FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13077  (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13078  Opcode = ARMISD::VMINVu;
13079  if (CC == ISD::SETUGT)
13081  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13082  FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13083  (CC == ISD::SETLT || CC == ISD::SETGT)) {
13084  Opcode = ARMISD::VMINVs;
13085  if (CC == ISD::SETGT)
13087  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13088  FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13089  (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13090  Opcode = ARMISD::VMAXVu;
13091  if (CC == ISD::SETULT)
13093  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13094  FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13095  (CC == ISD::SETGT || CC == ISD::SETLT)) {
13096  Opcode = ARMISD::VMAXVs;
13097  if (CC == ISD::SETLT)
13099  } else
13100  return SDValue();
13101 
13102  // Normalise to the right hand side being the vector reduction
13103  switch (TrueVal->getOpcode()) {
13104  case ISD::VECREDUCE_UMIN:
13105  case ISD::VECREDUCE_SMIN:
13106  case ISD::VECREDUCE_UMAX:
13107  case ISD::VECREDUCE_SMAX:
13108  std::swap(LHS, RHS);
13110  break;
13111  }
13112 
13113  EVT VectorType = FalseVal->getOperand(0).getValueType();
13114 
13115  if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13117  return SDValue();
13118 
13119  EVT VectorScalarType = VectorType.getVectorElementType();
13120 
13121  // The values being selected must also be the ones being compared
13122  if (TrueVal != LHS || FalseVal != RHS)
13123  return SDValue();
13124 
13125  EVT LeftType = LHS->getValueType(0);
13126  EVT RightType = RHS->getValueType(0);
13127 
13128  // The types must match the reduced type too
13129  if (LeftType != VectorScalarType || RightType != VectorScalarType)
13130  return SDValue();
13131 
13132  // Legalise the scalar to an i32
13133  if (VectorScalarType != MVT::i32)
13134  LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13135 
13136  // Generate the reduction as an i32 for legalisation purposes
13137  auto Reduction =
13138  DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13139 
13140  // The result isn't actually an i32 so truncate it back to its original type
13141  if (VectorScalarType != MVT::i32)
13142  Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13143 
13144  return Reduction;
13145 }
13146 
13147 // A special combine for the vqdmulh family of instructions. This is one of the
13148 // potential set of patterns that could patch this instruction. The base pattern
13149 // you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13150 // This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13151 // which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13152 // the max is unnecessary.
13154  EVT VT = N->getValueType(0);
13155  SDValue Shft;
13156  ConstantSDNode *Clamp;
13157 
13158  if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13159  return SDValue();
13160 
13161  if (N->getOpcode() == ISD::SMIN) {
13162  Shft = N->getOperand(0);
13163  Clamp = isConstOrConstSplat(N->getOperand(1));
13164  } else if (N->getOpcode() == ISD::VSELECT) {
13165  // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13166  SDValue Cmp = N->getOperand(0);
13167  if (Cmp.getOpcode() != ISD::SETCC ||
13168  cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13169  Cmp.getOperand(0) != N->getOperand(1) ||
13170  Cmp.getOperand(1) != N->getOperand(2))
13171  return SDValue();
13172  Shft = N->getOperand(1);
13173  Clamp = isConstOrConstSplat(N->getOperand(2));
13174  } else
13175  return SDValue();
13176 
13177  if (!Clamp)
13178  return SDValue();
13179 
13180  MVT ScalarType;
13181  int ShftAmt = 0;
13182  switch (Clamp->getSExtValue()) {
13183  case (1 << 7) - 1:
13184  ScalarType = MVT::i8;
13185  ShftAmt = 7;
13186  break;
13187  case (1 << 15) - 1:
13188  ScalarType = MVT::i16;
13189  ShftAmt = 15;
13190  break;
13191  case (1ULL << 31) - 1:
13192  ScalarType = MVT::i32;
13193  ShftAmt = 31;
13194  break;
13195  default:
13196  return SDValue();
13197  }
13198 
13199  if (Shft.getOpcode() != ISD::SRA)
13200  return SDValue();
13202  if (!N1 || N1->getSExtValue() != ShftAmt)
13203  return SDValue();
13204 
13205  SDValue Mul = Shft.getOperand(0);
13206  if (Mul.getOpcode() != ISD::MUL)
13207  return SDValue();
13208 
13209  SDValue Ext0 = Mul.getOperand(0);
13210  SDValue Ext1 = Mul.getOperand(1);
13211  if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13212  Ext1.getOpcode() != ISD::SIGN_EXTEND)
13213  return SDValue();
13214  EVT VecVT = Ext0.getOperand(0).getValueType();
13215  if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13216  return SDValue();
13217  if (Ext1.getOperand(0).getValueType() != VecVT ||
13218  VecVT.getScalarType() != ScalarType ||
13219  VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13220  return SDValue();
13221 
13222  SDLoc DL(Mul);
13223  unsigned LegalLanes = 128 / (ShftAmt + 1);
13224  EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13225  // For types smaller than legal vectors extend to be legal and only use needed
13226  // lanes.
13227  if (VecVT.getSizeInBits() < 128) {
13228  EVT ExtVecVT =
13230  VecVT.getVectorNumElements());
13231  SDValue Inp0 =
13232  DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13233  SDValue Inp1 =
13234  DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13235  Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13236  Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13237  SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13238  SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13239  Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13240  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13241  }
13242 
13243  // For larger types, split into legal sized chunks.
13244  assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13245  unsigned NumParts = VecVT.getSizeInBits() / 128;
13246  SmallVector<SDValue> Parts;
13247  for (unsigned I = 0; I < NumParts; ++I) {
13248  SDValue Inp0 =
13249  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13250  DAG.getVectorIdxConstant(I * LegalLanes, DL));
13251  SDValue Inp1 =
13252  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13253  DAG.getVectorIdxConstant(I * LegalLanes, DL));
13254  SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13255  Parts.push_back(VQDMULH);
13256  }
13257  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13258  DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13259 }
13260 
13263  const ARMSubtarget *Subtarget) {
13264  if (!Subtarget->hasMVEIntegerOps())
13265  return SDValue();
13266 
13267  if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13268  return V;
13269 
13270  // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13271  //
13272  // We need to re-implement this optimization here as the implementation in the
13273  // Target-Independent DAGCombiner does not handle the kind of constant we make
13274  // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13275  // good reason, allowing truncation there would break other targets).
13276  //
13277  // Currently, this is only done for MVE, as it's the only target that benefits
13278  // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13279  if (N->getOperand(0).getOpcode() != ISD::XOR)
13280  return SDValue();
13281  SDValue XOR = N->getOperand(0);
13282 
13283  // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13284  // It is important to check with truncation allowed as the BUILD_VECTORs we
13285  // generate in those situations will truncate their operands.
13286  ConstantSDNode *Const =
13287  isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13288  /*AllowTruncation*/ true);
13289  if (!Const || !Const->isOne())
13290  return SDValue();
13291 
13292  // Rewrite into vselect(cond, rhs, lhs).
13293  SDValue Cond = XOR->getOperand(0);
13294  SDValue LHS = N->getOperand(1);
13295  SDValue RHS = N->getOperand(2);
13296  EVT Type = N->getValueType(0);
13297  return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13298 }
13299 
13300 // Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13303  const ARMSubtarget *Subtarget) {
13304  SDValue Op0 = N->getOperand(0);
13305  SDValue Op1 = N->getOperand(1);
13306  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13307  EVT VT = N->getValueType(0);
13308 
13309  if (!Subtarget->hasMVEIntegerOps() ||
13311  return SDValue();
13312 
13313  if (CC == ISD::SETUGE) {
13314  std::swap(Op0, Op1);
13315  CC = ISD::SETULT;
13316  }
13317 
13318  if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13319  Op0.getOpcode() != ISD::BUILD_VECTOR)
13320  return SDValue();
13321 
13322  // Check first operand is BuildVector of 0,1,2,...
13323  for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13324  if (!Op0.getOperand(I).isUndef() &&
13325  !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13326  Op0.getConstantOperandVal(I) == I))
13327  return SDValue();
13328  }
13329 
13330  // The second is a Splat of Op1S
13331  SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13332  if (!Op1S)
13333  return SDValue();
13334 
13335  unsigned Opc;
13336  switch (VT.getVectorNumElements()) {
13337  case 2:
13338  Opc = Intrinsic::arm_mve_vctp64;
13339  break;
13340  case 4:
13341  Opc = Intrinsic::arm_mve_vctp32;
13342  break;
13343  case 8:
13344  Opc = Intrinsic::arm_mve_vctp16;
13345  break;
13346  case 16:
13347  Opc = Intrinsic::arm_mve_vctp8;
13348  break;
13349  default:
13350  return SDValue();
13351  }
13352 
13353  SDLoc DL(N);
13354  return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13355  DCI.DAG.getConstant(Opc, DL, MVT::i32),
13356  DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13357 }
13358 
13361  const ARMSubtarget *Subtarget) {
13362  SelectionDAG &DAG = DCI.DAG;
13363  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13364 
13365  if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
13366  return SDValue();
13367 
13368  return TLI.expandABS(N, DAG);
13369 }
13370 
13371 /// PerformADDECombine - Target-specific dag combine transform from
13372 /// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13373 /// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13376  const ARMSubtarget *Subtarget) {
13377  // Only ARM and Thumb2 support UMLAL/SMLAL.
13378  if (Subtarget->isThumb1Only())
13379  return PerformAddeSubeCombine(N, DCI, Subtarget);
13380 
13381  // Only perform the checks after legalize when the pattern is available.
13382  if (DCI.isBeforeLegalize()) return SDValue();
13383 
13384  return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13385 }
13386 
13387 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13388 /// operands N0 and N1. This is a helper for PerformADDCombine that is
13389 /// called with the default operands, and if that fails, with commuted
13390 /// operands.
13393  const ARMSubtarget *Subtarget){
13394  // Attempt to create vpadd for this add.
13395  if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13396  return Result;
13397 
13398  // Attempt to create vpaddl for this add.
13399  if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13400  return Result;
13401  if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13402  Subtarget))
13403  return Result;
13404 
13405  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13406  if (N0.getNode()->hasOneUse())
13407  if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13408  return Result;
13409  return SDValue();
13410 }
13411 
13413  EVT VT = N->getValueType(0);
13414  SDValue N0 = N->getOperand(0);
13415  SDValue N1 = N->getOperand(1);
13416  SDLoc dl(N);
13417 
13418  auto IsVecReduce = [](SDValue Op) {
13419  switch (Op.getOpcode()) {
13420  case ISD::VECREDUCE_ADD:
13421  case ARMISD::VADDVs:
13422  case ARMISD::VADDVu:
13423  case ARMISD::VMLAVs:
13424  case ARMISD::VMLAVu:
13425  return true;
13426  }
13427  return false;
13428  };
13429 
13430  auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13431  // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13432  // add(add(X, vecreduce(Y)), vecreduce(Z))
13433  // to make better use of vaddva style instructions.
13434  if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13435  IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13436  !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13437  SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13438  return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13439  }
13440  // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13441  // add(add(add(A, C), reduce(B)), reduce(D))
13442  if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13443  N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13444  unsigned N0RedOp = 0;
13445  if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13446  N0RedOp = 1;
13447  if (!IsVecReduce(N0.getOperand(N0RedOp)))
13448  return SDValue();
13449  }
13450 
13451  unsigned N1RedOp = 0;
13452  if (!IsVecReduce(N1.getOperand(N1RedOp)))
13453  N1RedOp = 1;
13454  if (!IsVecReduce(N1.getOperand(N1RedOp)))
13455  return SDValue();
13456 
13457  SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13458  N1.getOperand(1 - N1RedOp));
13459  SDValue Add1 =
13460  DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13461  return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13462  }
13463  return SDValue();
13464  };
13465  if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13466  return R;
13467  if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13468  return R;
13469 
13470  // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13471  // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13472  // by ascending load offsets. This can help cores prefetch if the order of
13473  // loads is more predictable.
13474  auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13475  // Check if two reductions are known to load data where one is before/after
13476  // another. Return negative if N0 loads data before N1, positive if N1 is
13477  // before N0 and 0 otherwise if nothing is known.
13478  auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13479  // Look through to the first operand of a MUL, for the VMLA case.
13480  // Currently only looks at the first operand, in the hope they are equal.
13481  if (N0.getOpcode() == ISD::MUL)
13482  N0 = N0.getOperand(0);
13483  if (N1.getOpcode() == ISD::MUL)
13484  N1 = N1.getOperand(0);
13485 
13486  // Return true if the two operands are loads to the same object and the
13487  // offset of the first is known to be less than the offset of the second.
13488  LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13489  LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13490  if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13491  !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13492  Load1->isIndexed())
13493  return 0;
13494 
13495  auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13496  auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13497 
13498  if (!BaseLocDecomp0.getBase() ||
13499  BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13500  !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13501  return 0;
13502  if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13503  return -1;
13504  if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13505  return 1;
13506  return 0;
13507  };
13508 
13509  SDValue X;
13510  if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13511  if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13512  int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13513  N0.getOperand(1).getOperand(0));
13514  if (IsBefore < 0) {
13515  X = N0.getOperand(0);
13516  N0 = N0.getOperand(1);
13517  } else if (IsBefore > 0) {
13518  X = N0.getOperand(1);
13519  N0 = N0.getOperand(0);
13520  } else
13521  return SDValue();
13522  } else if (IsVecReduce(N0.getOperand(0))) {
13523  X = N0.getOperand(1);
13524  N0 = N0.getOperand(0);
13525  } else if (IsVecReduce(N0.getOperand(1))) {
13526  X = N0.getOperand(0);
13527  N0 = N0.getOperand(1);
13528  } else
13529  return SDValue();
13530  } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13531  IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13532  // Note this is backward to how you would expect. We create
13533  // add(reduce(load + 16), reduce(load + 0)) so that the
13534  // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13535  // the X as VADDV(load + 0)
13536  return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13537  } else
13538  return SDValue();
13539 
13540  if (!IsVecReduce(N0) || !IsVecReduce(N1))
13541  return SDValue();
13542 
13543  if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13544  return SDValue();
13545 
13546  // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13547  SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13548  return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13549  };
13550  if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13551  return R;
13552  if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13553  return R;
13554  return SDValue();
13555 }
13556 
13558  const ARMSubtarget *Subtarget) {
13559  if (!Subtarget->hasMVEIntegerOps())
13560  return SDValue();
13561 
13562  if (SDValue R = TryDistrubutionADDVecReduce(N, DAG))
13563  return R;
13564 
13565  EVT VT = N->getValueType(0);
13566  SDValue N0 = N->getOperand(0);
13567  SDValue N1 = N->getOperand(1);
13568  SDLoc dl(N);
13569 
13570  if (VT != MVT::i64)
13571  return SDValue();
13572 
13573  // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13574  // will look like:
13575  // t1: i32,i32 = ARMISD::VADDLVs x
13576  // t2: i64 = build_pair t1, t1:1
13577  // t3: i64 = add t2, y
13578  // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13579  // the add to be simplified seperately.
13580  // We also need to check for sext / zext and commutitive adds.
13581  auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13582  SDValue NB) {
13583  if (NB->getOpcode() != ISD::BUILD_PAIR)
13584  return SDValue();
13585  SDValue VecRed = NB->getOperand(0);
13586  if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13587  VecRed.getResNo() != 0 ||
13588  NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13589  return SDValue();
13590 
13591  if (VecRed->getOpcode() == OpcodeA) {
13592  // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13593  SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13594  VecRed.getOperand(0), VecRed.getOperand(1));
13595  NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13596  }
13597 
13599  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, NA,
13600  DAG.getConstant(0, dl, MVT::i32)));
13601  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, NA,
13602  DAG.getConstant(1, dl, MVT::i32)));
13603  unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13604  for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13605  Ops.push_back(VecRed->getOperand(I));
13606  SDValue Red =
13607  DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13608  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13609  SDValue(Red.getNode(), 1));
13610  };
13611 
13612  if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13613  return M;
13614  if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13615  return M;
13616  if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13617  return M;
13618  if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13619  return M;
13620  if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13621  return M;
13622  if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13623  return M;
13624  if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13625  return M;
13626  if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13627  return M;
13628  if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13629  return M;
13630  if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13631  return M;
13632  if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13633  return M;
13634  if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13635  return M;
13636  if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13637  return M;
13638  if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13639  return M;
13640  if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13641  return M;
13642  if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13643  return M;
13644  return SDValue();
13645 }
13646 
13647 bool
13649  CombineLevel Level) const {
13650  assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13651  N->getOpcode() == ISD::SRL) &&
13652  "Expected shift op");
13653 
13654  if (Level == BeforeLegalizeTypes)
13655  return true;
13656 
13657  if (N->getOpcode() != ISD::SHL)
13658  return true;
13659 
13660  if (Subtarget->isThumb1Only()) {
13661  // Avoid making expensive immediates by commuting shifts. (This logic
13662  // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13663  // for free.)
13664  if (N->getOpcode() != ISD::SHL)
13665  return true;
13666  SDValue N1 = N->getOperand(0);
13667  if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13668  N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13669  return true;
13670  if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13671  if (Const->getAPIntValue().ult(256))
13672  return false;
13673  if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13674  Const->getAPIntValue().sgt(-256))
13675  return false;
13676  }
13677  return true;
13678  }
13679 
13680  // Turn off commute-with-shift transform after legalization, so it doesn't
13681  // conflict with PerformSHLSimplify. (We could try to detect when
13682  // PerformSHLSimplify would trigger more precisely, but it isn't
13683  // really necessary.)
13684  return false;
13685 }
13686 
13688  const SDNode *N) const {
13689  assert(N->getOpcode() == ISD::XOR &&
13690  (N->getOperand(0).getOpcode() == ISD::SHL ||
13691  N->getOperand(0).getOpcode() == ISD::SRL) &&
13692  "Expected XOR(SHIFT) pattern");
13693 
13694  // Only commute if the entire NOT mask is a hidden shifted mask.
13695  auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13696  auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13697  if (XorC && ShiftC) {
13698  unsigned MaskIdx, MaskLen;
13699  if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13700  unsigned ShiftAmt = ShiftC->getZExtValue();
13701  unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13702  if (N->getOperand(0).getOpcode() == ISD::SHL)
13703  return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13704  return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13705  }
13706  }
13707 
13708  return false;
13709 }
13710 
13712  const SDNode *N, CombineLevel Level) const {
13713  assert(((N->getOpcode() == ISD::SHL &&
13714  N->getOperand(0).getOpcode() == ISD::SRL) ||
13715  (N->getOpcode() == ISD::SRL &&
13716  N->getOperand(0).getOpcode() == ISD::SHL)) &&
13717  "Expected shift-shift mask");
13718 
13719  if (!Subtarget->isThumb1Only())
13720  return true;
13721 
13722  if (Level == BeforeLegalizeTypes)
13723  return true;
13724 
13725  return false;
13726 }
13727 
13729  if (!Subtarget->hasNEON()) {
13730  if (Subtarget->isThumb1Only())
13731  return VT.getScalarSizeInBits() <= 32;
13732  return true;
13733  }
13734  return VT.isScalarInteger();
13735 }
13736 
13738  EVT VT) const {
13739  if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13740  return false;
13741 
13742  switch (FPVT.getSimpleVT().SimpleTy) {
13743  case MVT::f16:
13744  return Subtarget->hasVFP2Base();
13745  case MVT::f32:
13746  return Subtarget->hasVFP2Base();
13747  case MVT::f64:
13748  return Subtarget->hasFP64();
13749  case MVT::v4f32:
13750  case MVT::v8f16:
13751  return Subtarget->hasMVEFloatOps();
13752  default:
13753  return false;
13754  }
13755 }
13756 
13759  const ARMSubtarget *ST) {
13760  // Allow the generic combiner to identify potential bswaps.
13761  if (DCI.isBeforeLegalize())
13762  return SDValue();
13763 
13764  // DAG combiner will fold:
13765  // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13766  // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13767  // Other code patterns that can be also be modified have the following form:
13768  // b + ((a << 1) | 510)
13769  // b + ((a << 1) & 510)
13770  // b + ((a << 1) ^ 510)
13771  // b + ((a << 1) + 510)
13772 
13773  // Many instructions can perform the shift for free, but it requires both
13774  // the operands to be registers. If c1 << c2 is too large, a mov immediate
13775  // instruction will needed. So, unfold back to the original pattern if:
13776  // - if c1 and c2 are small enough that they don't require mov imms.
13777  // - the user(s) of the node can perform an shl
13778 
13779  // No shifted operands for 16-bit instructions.
13780  if (ST->isThumb() && ST->isThumb1Only())
13781  return SDValue();
13782 
13783  // Check that all the users could perform the shl themselves.
13784  for (auto *U : N->uses()) {
13785  switch(U->getOpcode()) {
13786  default:
13787  return SDValue();
13788  case ISD::SUB:
13789  case ISD::ADD:
13790  case ISD::AND:
13791  case ISD::OR:
13792  case ISD::XOR:
13793  case ISD::SETCC:
13794  case ARMISD::CMP:
13795  // Check that the user isn't already using a constant because there
13796  // aren't any instructions that support an immediate operand and a
13797  // shifted operand.
13798  if (isa<ConstantSDNode>(U->getOperand(0)) ||
13799  isa<ConstantSDNode>(U->getOperand(1)))
13800  return SDValue();
13801 
13802  // Check that it's not already using a shift.
13803  if (U->getOperand(0).getOpcode() == ISD::SHL ||
13804  U->getOperand(1).getOpcode() == ISD::SHL)
13805  return SDValue();
13806  break;
13807  }
13808  }
13809 
13810  if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13811  N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13812  return SDValue();
13813 
13814  if (N->getOperand(0).getOpcode() != ISD::SHL)
13815  return SDValue();
13816 
13817  SDValue SHL = N->getOperand(0);
13818 
13819  auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13820  auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
13821  if (!C1ShlC2 || !C2)
13822  return SDValue();
13823 
13824  APInt C2Int = C2->getAPIntValue();
13825  APInt C1Int = C1ShlC2->getAPIntValue();
13826  unsigned C2Width = C2Int.getBitWidth();
13827  if (C2Int.uge(C2Width))
13828  return SDValue();
13829  uint64_t C2Value = C2Int.getZExtValue();
13830 
13831  // Check that performing a lshr will not lose any information.
13832  APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
13833  if ((C1Int & Mask) != C1Int)
13834  return SDValue();
13835 
13836  // Shift the first constant.
13837  C1Int.lshrInPlace(C2Int);
13838 
13839  // The immediates are encoded as an 8-bit value that can be rotated.
13840  auto LargeImm = [](const APInt &Imm) {
13841  unsigned Zeros = Imm.countLeadingZeros() + Imm.countTrailingZeros();
13842  return Imm.getBitWidth() - Zeros > 8;
13843  };
13844 
13845  if (LargeImm(C1Int) || LargeImm(C2Int))
13846  return SDValue();
13847 
13848  SelectionDAG &DAG = DCI.DAG;
13849  SDLoc dl(N);
13850  SDValue X = SHL.getOperand(0);
13851  SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
13852  DAG.getConstant(C1Int, dl, MVT::i32));
13853  // Shift left to compensate for the lshr of C1Int.
13854  SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
13855 
13856  LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
13857  SHL.dump(); N->dump());
13858  LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
13859  return Res;
13860 }
13861 
13862 
13863 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
13864 ///
13867  const ARMSubtarget *Subtarget) {
13868  SDValue N0 = N->getOperand(0);
13869  SDValue N1 = N->getOperand(1);
13870 
13871  // Only works one way, because it needs an immediate operand.
13872  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
13873  return Result;
13874 
13875  if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
13876  return Result;
13877 
13878  // First try with the default operand order.
13879  if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
13880  return Result;
13881 
13882  // If that didn't work, try again with the operands commuted.
13883  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
13884 }
13885 
13886 // Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
13887 // providing -X is as cheap as X (currently, just a constant).
13889  if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
13890  return SDValue();
13891  SDValue CSINC = N->getOperand(1);
13892  if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
13893  return SDValue();
13894 
13895  ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
13896  if (!X)
13897  return SDValue();
13898 
13899  return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
13900  DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
13901  CSINC.getOperand(0)),
13902  CSINC.getOperand(1), CSINC.getOperand(2),
13903  CSINC.getOperand(3));
13904 }
13905 
13906 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
13907 ///
13910  const ARMSubtarget *Subtarget) {
13911  SDValue N0 = N->getOperand(0);
13912  SDValue N1 = N->getOperand(1);
13913 
13914  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
13915  if (N1.getNode()->hasOneUse())
13916  if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
13917  return Result;
13918 
13919  if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
13920  return R;
13921 
13922  if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
13923  return SDValue();
13924 
13925  // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
13926  // so that we can readily pattern match more mve instructions which can use
13927  // a scalar operand.
13928  SDValue VDup = N->getOperand(1);
13929  if (VDup->getOpcode() != ARMISD::VDUP)
13930  return SDValue();
13931 
13932  SDValue VMov = N->getOperand(0);
13933  if (VMov->getOpcode() == ISD::BITCAST)
13934  VMov = VMov->getOperand(0);
13935 
13936  if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
13937  return SDValue();
13938 
13939  SDLoc dl(N);
13940  SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
13941  DCI.DAG.getConstant(0, dl, MVT::i32),
13942  VDup->getOperand(0));
13943  return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
13944 }
13945 
13946 /// PerformVMULCombine
13947 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
13948 /// special multiplier accumulator forwarding.
13949 /// vmul d3, d0, d2
13950 /// vmla d3, d1, d2
13951 /// is faster than
13952 /// vadd d3, d0, d1
13953 /// vmul d3, d3, d2
13954 // However, for (A + B) * (A + B),
13955 // vadd d2, d0, d1
13956 // vmul d3, d0, d2
13957 // vmla d3, d1, d2
13958 // is slower than
13959 // vadd d2, d0, d1
13960 // vmul d3, d2, d2
13963  const ARMSubtarget *Subtarget) {
13964  if (!Subtarget->hasVMLxForwarding())
13965  return SDValue();
13966 
13967  SelectionDAG &DAG = DCI.DAG;
13968  SDValue N0 = N->getOperand(0);
13969  SDValue N1 = N->getOperand(1);
13970  unsigned Opcode = N0.getOpcode();
13971  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
13972  Opcode != ISD::FADD && Opcode != ISD::FSUB) {
13973  Opcode = N1.getOpcode();
13974  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
13975  Opcode != ISD::FADD && Opcode != ISD::FSUB)
13976  return SDValue();
13977  std::swap(N0, N1);
13978  }
13979 
13980  if (N0 == N1)
13981  return SDValue();
13982 
13983  EVT VT = N->getValueType(0);
13984  SDLoc DL(N);
13985  SDValue N00 = N0->getOperand(0);
13986  SDValue N01 = N0->getOperand(1);
13987  return DAG.getNode(Opcode, DL, VT,
13988  DAG.getNode(ISD::MUL, DL, VT, N00, N1),
13989  DAG.getNode(ISD::MUL, DL, VT, N01, N1));
13990 }
13991 
13993  const ARMSubtarget *Subtarget) {
13994  EVT VT = N->getValueType(0);
13995  if (VT != MVT::v2i64)
13996  return SDValue();
13997 
13998  SDValue N0 = N->getOperand(0);
13999  SDValue N1 = N->getOperand(1);
14000 
14001  auto IsSignExt = [&](SDValue Op) {
14002  if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14003  return SDValue();
14004  EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14005  if (VT.getScalarSizeInBits() == 32)
14006  return Op->getOperand(0);
14007  return SDValue();
14008  };
14009  auto IsZeroExt = [&](SDValue Op) {
14010  // Zero extends are a little more awkward. At the point we are matching
14011  // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14012  // That might be before of after a bitcast depending on how the and is
14013  // placed. Because this has to look through bitcasts, it is currently only
14014  // supported on LE.
14015  if (!Subtarget->isLittle())
14016  return SDValue();
14017 
14018  SDValue And = Op;
14019  if (And->getOpcode() == ISD::BITCAST)
14020  And = And->getOperand(0);
14021  if (And->getOpcode() != ISD::AND)
14022  return SDValue();
14023  SDValue Mask = And->getOperand(1);
14024  if (Mask->getOpcode() == ISD::BITCAST)
14025  Mask = Mask->getOperand(0);
14026 
14027  if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14028  Mask.getValueType() != MVT::v4i32)
14029  return SDValue();
14030  if (isAllOnesConstant(Mask->getOperand(0)) &&
14031  isNullConstant(Mask->getOperand(1)) &&
14032  isAllOnesConstant(Mask->getOperand(2)) &&
14033  isNullConstant(Mask->getOperand(3)))
14034  return And->getOperand(0);
14035  return SDValue();
14036  };
14037 
14038  SDLoc dl(N);
14039  if (SDValue Op0 = IsSignExt(N0)) {
14040  if (SDValue Op1 = IsSignExt(N1)) {
14041  SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14042  SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14043  return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14044  }
14045  }
14046  if (SDValue Op0 = IsZeroExt(N0)) {
14047  if (SDValue Op1 = IsZeroExt(N1)) {
14048  SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14049  SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14050  return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14051  }
14052  }
14053 
14054  return SDValue();
14055 }
14056 
14059  const ARMSubtarget *Subtarget) {
14060  SelectionDAG &DAG = DCI.DAG;
14061 
14062  EVT VT = N->getValueType(0);
14063  if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14064  return PerformMVEVMULLCombine(N, DAG, Subtarget);
14065 
14066  if (Subtarget->isThumb1Only())
14067  return SDValue();
14068 
14069  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14070  return SDValue();
14071 
14072  if (VT.is64BitVector() || VT.is128BitVector())
14073  return PerformVMULCombine(N, DCI, Subtarget);
14074  if (VT != MVT::i32)
14075  return SDValue();
14076 
14077  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14078  if (!C)
14079  return SDValue();
14080 
14081  int64_t MulAmt = C->getSExtValue();
14082  unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14083 
14084  ShiftAmt = ShiftAmt & (32 - 1);
14085  SDValue V = N->getOperand(0);
14086  SDLoc DL(N);
14087 
14088  SDValue Res;
14089  MulAmt >>= ShiftAmt;
14090 
14091  if (MulAmt >= 0) {
14092  if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14093  // (mul x, 2^N + 1) => (add (shl x, N), x)
14094  Res = DAG.getNode(ISD::ADD, DL, VT,
14095  V,
14096  DAG.getNode(ISD::SHL, DL, VT,
14097  V,
14098  DAG.getConstant(Log2_32(MulAmt - 1), DL,
14099  MVT::i32)));
14100  } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14101  // (mul x, 2^N - 1) => (sub (shl x, N), x)
14102  Res = DAG.getNode(ISD::SUB, DL, VT,
14103  DAG.getNode(ISD::SHL, DL, VT,
14104  V,
14105  DAG.getConstant(Log2_32(MulAmt + 1), DL,
14106  MVT::i32)),
14107  V);
14108  } else
14109  return SDValue();
14110  } else {
14111  uint64_t MulAmtAbs = -MulAmt;
14112  if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14113  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14114  Res = DAG.getNode(ISD::SUB, DL, VT,
14115  V,
14116  DAG.getNode(ISD::SHL, DL, VT,
14117  V,
14118  DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14119  MVT::i32)));
14120  } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14121  // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14122  Res = DAG.getNode(ISD::ADD, DL, VT,
14123  V,
14124  DAG.getNode(ISD::SHL, DL, VT,
14125  V,
14126  DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14127  MVT::i32)));
14128  Res = DAG.getNode(ISD::SUB, DL, VT,
14129  DAG.getConstant(0, DL, MVT::i32), Res);
14130  } else
14131  return SDValue();
14132  }
14133 
14134  if (ShiftAmt != 0)
14135  Res = DAG.getNode(ISD::SHL, DL, VT,
14136  Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14137 
14138  // Do not add new nodes to DAG combiner worklist.
14139  DCI.CombineTo(N, Res, false);
14140  return SDValue();
14141 }
14142 
14145  const ARMSubtarget *Subtarget) {
14146  // Allow DAGCombine to pattern-match before we touch the canonical form.
14147  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14148  return SDValue();
14149 
14150  if (N->getValueType(0) != MVT::i32)
14151  return SDValue();
14152 
14153  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14154  if (!N1C)
14155  return SDValue();
14156 
14157  uint32_t C1 = (uint32_t)N1C->getZExtValue();
14158  // Don't transform uxtb/uxth.
14159  if (C1 == 255 || C1 == 65535)
14160  return SDValue();
14161 
14162  SDNode *N0 = N->getOperand(0).getNode();
14163  if (!N0->hasOneUse())
14164  return SDValue();
14165 
14166  if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14167  return SDValue();
14168 
14169  bool LeftShift = N0->getOpcode() == ISD::SHL;
14170 
14171  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14172  if (!N01C)
14173  return SDValue();
14174 
14175  uint32_t C2 = (uint32_t)N01C->getZExtValue();
14176  if (!C2 || C2 >= 32)
14177  return SDValue();
14178 
14179  // Clear irrelevant bits in the mask.
14180  if (LeftShift)
14181  C1 &= (-1U << C2);
14182  else
14183  C1 &= (-1U >> C2);
14184 
14185  SelectionDAG &DAG = DCI.DAG;
14186  SDLoc DL(N);
14187 
14188  // We have a pattern of the form "(and (shl x, c2) c1)" or
14189  // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14190  // transform to a pair of shifts, to save materializing c1.
14191 
14192  // First pattern: right shift, then mask off leading bits.
14193  // FIXME: Use demanded bits?
14194  if (!LeftShift && isMask_32(C1)) {
14196  if (C2 < C3) {
14197  SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14198  DAG.getConstant(C3 - C2, DL, MVT::i32));
14199  return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14200  DAG.getConstant(C3, DL, MVT::i32));
14201  }
14202  }
14203 
14204  // First pattern, reversed: left shift, then mask off trailing bits.
14205  if (LeftShift && isMask_32(~C1)) {
14207  if (C2 < C3) {
14208  SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14209  DAG.getConstant(C3 - C2, DL, MVT::i32));
14210  return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14211  DAG.getConstant(C3, DL, MVT::i32));
14212  }
14213  }
14214 
14215  // Second pattern: left shift, then mask off leading bits.
14216  // FIXME: Use demanded bits?
14217  if (LeftShift && isShiftedMask_32(C1)) {
14218  uint32_t Trailing = llvm::countr_zero(C1);
14220  if (Trailing == C2 && C2 + C3 < 32) {
14221  SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14222  DAG.getConstant(C2 + C3, DL, MVT::i32));
14223  return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14224  DAG.getConstant(C3, DL, MVT::i32));
14225  }
14226  }
14227 
14228  // Second pattern, reversed: right shift, then mask off trailing bits.
14229  // FIXME: Handle other patterns of known/demanded bits.
14230  if (!LeftShift && isShiftedMask_32(C1)) {
14231  uint32_t Leading = llvm::countl_zero(C1);
14233  if (Leading == C2 && C2 + C3 < 32) {
14234  SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14235  DAG.getConstant(C2 + C3, DL, MVT::i32));
14236  return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14237  DAG.getConstant(C3, DL, MVT::i32));
14238  }
14239  }
14240 
14241  // FIXME: Transform "(and (shl x, c2) c1)" ->
14242  // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
14243  // c1.
14244  return SDValue();
14245 }
14246 
14249  const ARMSubtarget *Subtarget) {
14250  // Attempt to use immediate-form VBIC
14251  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14252  SDLoc dl(N);
14253  EVT VT = N->getValueType(0);
14254  SelectionDAG &DAG = DCI.DAG;
14255 
14256  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14257  VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14258  return SDValue();
14259 
14260  APInt SplatBits, SplatUndef;
14261  unsigned SplatBitSize;
14262  bool HasAnyUndefs;
14263  if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14264  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14265  if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14266  SplatBitSize == 64) {
14267  EVT VbicVT;
14268  SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14269  SplatUndef.getZExtValue(), SplatBitSize,
14270  DAG, dl, VbicVT, VT, OtherModImm);
14271  if (Val.getNode()) {
14272  SDValue Input =
14273  DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14274  SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14275  return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
14276  }
14277  }
14278  }
14279 
14280  if (!Subtarget->isThumb1Only()) {
14281  // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14282  if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14283  return Result;
14284 
14285  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14286  return Result;
14287  }
14288 
14289  if (Subtarget->isThumb1Only())
14290  if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14291  return Result;
14292 
14293  return SDValue();
14294 }
14295 
14296 // Try combining OR nodes to SMULWB, SMULWT.
14299  const ARMSubtarget *Subtarget) {
14300  if (!Subtarget->hasV6Ops() ||
14301  (Subtarget->isThumb() &&
14302  (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14303  return SDValue();
14304 
14305  SDValue SRL = OR->getOperand(0);
14306  SDValue SHL = OR->getOperand(1);
14307 
14308  if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14309  SRL = OR->getOperand(1);
14310  SHL = OR->getOperand(0);
14311  }
14312  if (!isSRL16(SRL) || !isSHL16(SHL))
14313  return SDValue();
14314 
14315  // The first operands to the shifts need to be the two results from the
14316  // same smul_lohi node.
14317  if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14318  SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14319  return SDValue();
14320 
14321  SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14322  if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14323  SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14324  return SDValue();
14325 
14326  // Now we have:
14327  // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14328  // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14329  // For SMUWB the 16-bit value will signed extended somehow.
14330  // For SMULWT only the SRA is required.
14331  // Check both sides of SMUL_LOHI
14332  SDValue OpS16 = SMULLOHI->getOperand(0);
14333  SDValue OpS32 = SMULLOHI->getOperand(1);
14334 
14335  SelectionDAG &DAG = DCI.DAG;
14336  if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14337  OpS16 = OpS32;
14338  OpS32 = SMULLOHI->getOperand(0);
14339  }
14340 
14341  SDLoc dl(OR);
14342  unsigned Opcode = 0;
14343  if (isS16(OpS16, DAG))
14344  Opcode = ARMISD::SMULWB;
14345  else if (isSRA16(OpS16)) {
14346  Opcode = ARMISD::SMULWT;
14347  OpS16 = OpS16->getOperand(0);
14348  }
14349  else
14350  return SDValue();
14351 
14352  SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14353  DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14354  return SDValue(OR, 0);
14355 }
14356 
14359  const ARMSubtarget *Subtarget) {
14360  // BFI is only available on V6T2+
14361  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14362  return SDValue();
14363 
14364  EVT VT = N->getValueType(0);
14365  SDValue N0 = N->getOperand(0);
14366  SDValue N1 = N->getOperand(1);
14367  SelectionDAG &DAG = DCI.DAG;
14368  SDLoc DL(N);
14369  // 1) or (and A, mask), val => ARMbfi A, val, mask
14370  // iff (val & mask) == val
14371  //
14372  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14373  // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14374  // && mask == ~mask2
14375  // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14376  // && ~mask == mask2
14377  // (i.e., copy a bitfield value into another bitfield of the same width)
14378 
14379  if (VT != MVT::i32)
14380  return SDValue();
14381 
14382  SDValue N00 = N0.getOperand(0);
14383 
14384  // The value and the mask need to be constants so we can verify this is
14385  // actually a bitfield set. If the mask is 0xffff, we can do better
14386  // via a movt instruction, so don't use BFI in that case.
14387  SDValue MaskOp = N0.getOperand(1);
14388  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14389  if (!MaskC)
14390  return SDValue();
14391  unsigned Mask = MaskC->getZExtValue();
14392  if (Mask == 0xffff)
14393  return SDValue();
14394  SDValue Res;
14395  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14396  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14397  if (N1C) {
14398  unsigned Val = N1C->getZExtValue();
14399  if ((Val & ~Mask) != Val)
14400  return SDValue();
14401 
14403  Val >>= llvm::countr_zero(~Mask);
14404 
14405  Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14406  DAG.getConstant(Val, DL, MVT::i32),
14407  DAG.getConstant(Mask, DL, MVT::i32));
14408 
14409  DCI.CombineTo(N, Res, false);
14410  // Return value from the original node to inform the combiner than N is
14411  // now dead.
14412  return SDValue(N, 0);
14413  }
14414  } else if (N1.getOpcode() == ISD::AND) {
14415  // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14416  ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14417  if (!N11C)
14418  return SDValue();
14419  unsigned Mask2 = N11C->getZExtValue();
14420 
14421  // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14422  // as is to match.
14424  (Mask == ~Mask2)) {
14425  // The pack halfword instruction works better for masks that fit it,
14426  // so use that when it's available.
14427  if (Subtarget->hasDSP() &&
14428  (Mask == 0xffff || Mask == 0xffff0000))
14429  return SDValue();
14430  // 2a
14431  unsigned amt = llvm::countr_zero(Mask2);
14432  Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14433  DAG.getConstant(amt, DL, MVT::i32));
14434  Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14435  DAG.getConstant(Mask, DL, MVT::i32));
14436  DCI.CombineTo(N, Res, false);
14437  // Return value from the original node to inform the combiner than N is
14438  // now dead.
14439  return SDValue(N, 0);
14440  } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14441  (~Mask == Mask2)) {
14442  // The pack halfword instruction works better for masks that fit it,
14443  // so use that when it's available.
14444  if (Subtarget->hasDSP() &&
14445  (Mask2 == 0xffff || Mask2 == 0xffff0000))
14446  return SDValue();
14447  // 2b
14448  unsigned lsb = llvm::countr_zero(Mask);
14449  Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14450  DAG.getConstant(lsb, DL, MVT::i32));
14451  Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14452  DAG.getConstant(Mask2, DL, MVT::i32));
14453  DCI.CombineTo(N, Res, false);
14454  // Return value from the original node to inform the combiner than N is
14455  // now dead.
14456  return SDValue(N, 0);
14457  }
14458  }
14459 
14460  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14461  N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14463  // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14464  // where lsb(mask) == #shamt and masked bits of B are known zero.
14465  SDValue ShAmt = N00.getOperand(1);
14466  unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
14467  unsigned LSB = llvm::countr_zero(Mask);
14468  if (ShAmtC != LSB)
14469  return SDValue();
14470 
14471  Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14472  DAG.getConstant(~Mask, DL, MVT::i32));
14473 
14474  DCI.CombineTo(N, Res, false);
14475  // Return value from the original node to inform the combiner than N is
14476  // now dead.
14477  return SDValue(N, 0);
14478  }
14479 
14480  return SDValue();
14481 }
14482 
14483 static bool isValidMVECond(unsigned CC, bool IsFloat) {
14484  switch (CC) {
14485  case ARMCC::EQ:
14486  case ARMCC::NE:
14487  case ARMCC::LE:
14488  case ARMCC::GT:
14489  case ARMCC::GE:
14490  case ARMCC::LT:
14491  return true;
14492  case ARMCC::HS:
14493  case ARMCC::HI:
14494  return !IsFloat;
14495  default:
14496  return false;
14497  };
14498 }
14499 
14501  if (N->getOpcode() == ARMISD::VCMP)
14502  return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14503  else if (N->getOpcode() == ARMISD::VCMPZ)
14504  return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14505  else
14506  llvm_unreachable("Not a VCMP/VCMPZ!");
14507 }
14508 
14511  return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14512 }
14513 
14515  const ARMSubtarget *Subtarget) {
14516  // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14517  // together with predicates
14518  EVT VT = N->getValueType(0);
14519  SDLoc DL(N);
14520  SDValue N0 = N->getOperand(0);
14521  SDValue N1 = N->getOperand(1);
14522 
14523  auto IsFreelyInvertable = [&](SDValue V) {
14524  if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14525  return CanInvertMVEVCMP(V);
14526  return false;
14527  };
14528 
14529  // At least one operand must be freely invertable.
14530  if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14531  return SDValue();
14532 
14533  SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14534  SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14535  SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14536  return DAG.getLogicalNOT(DL, And, VT);
14537 }
14538 
14539 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14542  const ARMSubtarget *Subtarget) {
14543  // Attempt to use immediate-form VORR
14544  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14545  SDLoc dl(N);
14546  EVT VT = N->getValueType(0);
14547  SelectionDAG &DAG = DCI.DAG;
14548 
14549  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14550  return SDValue();
14551 
14552  if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14553  VT == MVT::v8i1 || VT == MVT::v16i1))
14554  return PerformORCombine_i1(N, DAG, Subtarget);
14555 
14556  APInt SplatBits, SplatUndef;
14557  unsigned SplatBitSize;
14558  bool HasAnyUndefs;
14559  if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14560  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14561  if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14562  SplatBitSize == 64) {
14563  EVT VorrVT;
14564  SDValue Val =
14565  isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14566  SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14567  if (Val.getNode()) {
14568  SDValue Input =
14569  DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14570  SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14571  return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
14572  }
14573  }
14574  }
14575 
14576  if (!Subtarget->isThumb1Only()) {
14577  // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14578  if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14579  return Result;
14580  if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14581  return Result;
14582  }
14583 
14584  SDValue N0 = N->getOperand(0);
14585  SDValue N1 = N->getOperand(1);
14586 
14587  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14588  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14589  DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
14590 
14591  // The code below optimizes (or (and X, Y), Z).
14592  // The AND operand needs to have a single user to make these optimizations
14593  // profitable.
14594  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14595  return SDValue();
14596 
14597  APInt SplatUndef;
14598  unsigned SplatBitSize;
14599  bool HasAnyUndefs;
14600 
14601  APInt SplatBits0, SplatBits1;
14602  BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14603  BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14604  // Ensure that the second operand of both ands are constants
14605  if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14606  HasAnyUndefs) && !HasAnyUndefs) {
14607  if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14608  HasAnyUndefs) && !HasAnyUndefs) {
14609  // Ensure that the bit width of the constants are the same and that
14610  // the splat arguments are logical inverses as per the pattern we
14611  // are trying to simplify.
14612  if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14613  SplatBits0 == ~SplatBits1) {
14614  // Canonicalize the vector type to make instruction selection
14615  // simpler.
14616  EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14617  SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14618  N0->getOperand(1),
14619  N0->getOperand(0),
14620  N1->getOperand(0));
14621  return DAG.getNode(ISD::BITCAST, dl, VT, Result);
14622  }
14623  }
14624  }
14625  }
14626 
14627  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14628  // reasonable.
14629  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14630  if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14631  return Res;
14632  }
14633 
14634  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14635  return Result;
14636 
14637  return SDValue();
14638 }
14639 
14642  const ARMSubtarget *Subtarget) {
14643  EVT VT = N->getValueType(0);
14644  SelectionDAG &DAG = DCI.DAG;
14645 
14646  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14647  return SDValue();
14648 
14649  if (!Subtarget->isThumb1Only()) {
14650  // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14651  if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14652  return Result;
14653 
14654  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14655  return Result;
14656  }
14657 
14658  if (Subtarget->hasMVEIntegerOps()) {
14659  // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14660  SDValue N0 = N->getOperand(0);
14661  SDValue N1 = N->getOperand(1);
14662  const TargetLowering *TLI = Subtarget->getTargetLowering();
14663  if (TLI->isConstTrueVal(N1) &&
14664  (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14665  if (CanInvertMVEVCMP(N0)) {
14666  SDLoc DL(N0);
14668 
14670  Ops.push_back(N0->getOperand(0));
14671  if (N0->getOpcode() == ARMISD::VCMP)
14672  Ops.push_back(N0->getOperand(1));
14673  Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14674  return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14675  }
14676  }
14677  }
14678 
14679  return SDValue();
14680 }
14681 
14682 // ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14683 // and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14684 // their position in "to" (Rd).
14685 static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14686  assert(N->getOpcode() == ARMISD::BFI);
14687 
14688  SDValue From = N->getOperand(1);
14689  ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
14690  FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
14691 
14692  // If the Base came from a SHR #C, we can deduce that it is really testing bit
14693  // #C in the base of the SHR.
14694  if (From->getOpcode() == ISD::SRL &&
14695  isa<ConstantSDNode>(From->getOperand(1))) {
14696  APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
14697  assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14698  FromMask <<= Shift.getLimitedValue(31);
14699  From = From->getOperand(0);
14700  }
14701 
14702  return From;
14703 }
14704 
14705 // If A and B contain one contiguous set of bits, does A | B == A . B?
14706 //
14707 // Neither A nor B must be zero.
14708 static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14709  unsigned LastActiveBitInA = A.countTrailingZeros();
14710  unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
14711  return LastActiveBitInA - 1 == FirstActiveBitInB;
14712 }
14713 
14715  // We have a BFI in N. Find a BFI it can combine with, if one exists.
14716  APInt ToMask, FromMask;
14717  SDValue From = ParseBFI(N, ToMask, FromMask);
14718  SDValue To = N->getOperand(0);
14719 
14720  SDValue V = To;
14721  if (V.getOpcode() != ARMISD::BFI)
14722  return SDValue();
14723 
14724  APInt NewToMask, NewFromMask;
14725  SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14726  if (NewFrom != From)
14727  return SDValue();
14728 
14729  // Do the written bits conflict with any we've seen so far?
14730  if ((NewToMask & ToMask).getBoolValue())
14731  // Conflicting bits.
14732  return SDValue();
14733 
14734  // Are the new bits contiguous when combined with the old bits?
14735  if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14736  BitsProperlyConcatenate(FromMask, NewFromMask))
14737  return V;
14738  if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14739  BitsProperlyConcatenate(NewFromMask, FromMask))
14740  return V;
14741 
14742  return SDValue();
14743 }
14744 
14746  SDValue N0 = N->getOperand(0);
14747  SDValue N1 = N->getOperand(1);
14748 
14749  if (N1.getOpcode() == ISD::AND) {
14750  // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14751  // the bits being cleared by the AND are not demanded by the BFI.
14752  ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14753  if (!N11C)
14754  return SDValue();
14755  unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
14756  unsigned LSB = llvm::countr_zero(~InvMask);
14757  unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14758  assert(Width <
14759  static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14760  "undefined behavior");
14761  unsigned Mask = (1u << Width) - 1;
14762  unsigned Mask2 = N11C->getZExtValue();
14763  if ((Mask & (~Mask2)) == 0)
14764  return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14765  N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14766  return SDValue();
14767  }
14768 
14769  // Look for another BFI to combine with.
14770  if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14771  // We've found a BFI.
14772  APInt ToMask1, FromMask1;
14773  SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14774 
14775  APInt ToMask2, FromMask2;
14776  SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14777  assert(From1 == From2);
14778  (void)From2;
14779 
14780  // Create a new BFI, combining the two together.
14781  APInt NewFromMask = FromMask1 | FromMask2;
14782  APInt NewToMask = ToMask1 | ToMask2;
14783 
14784  EVT VT = N->getValueType(0);
14785  SDLoc dl(N);
14786 
14787  if (NewFromMask[0] == 0)
14788  From1 = DAG.getNode(
14789  ISD::SRL, dl, VT, From1,
14790  DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
14791  return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14792  DAG.getConstant(~NewToMask, dl, VT));
14793  }
14794 
14795  // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
14796  // that lower bit insertions are performed first, providing that M1 and M2
14797  // do no overlap. This can allow multiple BFI instructions to be combined
14798  // together by the other folds above.
14799  if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14800  APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14801  APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14802 
14803  if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14804  ToMask1.countLeadingZeros() < ToMask2.countLeadingZeros())
14805  return SDValue();
14806 
14807  EVT VT = N->getValueType(0);
14808  SDLoc dl(N);
14809  SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
14810  N->getOperand(1), N->getOperand(2));
14811  return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
14812  N0.getOperand(2));
14813  }
14814 
14815  return SDValue();
14816 }
14817 
14818 // Check that N is CMPZ(CSINC(0, 0, CC, X)),
14819 // or CMPZ(CMOV(1, 0, CC, $cpsr, X))
14820 // return X if valid.
14822  if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14823  return SDValue();
14824  SDValue CSInc = Cmp->getOperand(0);
14825 
14826  // Ignore any `And 1` nodes that may not yet have been removed. We are
14827  // looking for a value that produces 1/0, so these have no effect on the
14828  // code.
14829  while (CSInc.getOpcode() == ISD::AND &&
14830  isa<ConstantSDNode>(CSInc.getOperand(1)) &&
14831  CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
14832  CSInc = CSInc.getOperand(0);
14833 
14834  if (CSInc.getOpcode() == ARMISD::CSINC &&
14835  isNullConstant(CSInc.getOperand(0)) &&
14836  isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14838  return CSInc.getOperand(3);
14839  }
14840  if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
14841  isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14843  return CSInc.getOperand(4);
14844  }
14845  if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
14846  isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
14849  return CSInc.getOperand(4);
14850  }
14851  return SDValue();
14852 }
14853 
14855  // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
14856  // t92: glue = ARMISD::CMPZ t74, 0
14857  // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
14858  // t96: glue = ARMISD::CMPZ t93, 0
14859  // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
14861  if (SDValue C = IsCMPZCSINC(N, Cond))
14862  if (Cond == ARMCC::EQ)
14863  return C;
14864  return SDValue();
14865 }
14866 
14868  // Fold away an unneccessary CMPZ/CSINC
14869  // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
14870  // if C1==EQ -> CSXYZ A, B, C2, D
14871  // if C1==NE -> CSXYZ A, B, NOT(C2), D
14873  if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
14874  if (N->getConstantOperandVal(2) == ARMCC::EQ)
14875  return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
14876  N->getOperand(1),
14877  DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
14878  if (N->getConstantOperandVal(2) == ARMCC::NE)
14879  return DAG.getNode(
14880  N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
14881  N->getOperand(1),
14883  }
14884  return SDValue();
14885 }
14886 
14887 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
14888 /// ARMISD::VMOVRRD.
14891  const ARMSubtarget *Subtarget) {
14892  // vmovrrd(vmovdrr x, y) -> x,y
14893  SDValue InDouble = N->getOperand(0);
14894  if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
14895  return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
14896 
14897  // vmovrrd(load f64) -> (load i32), (load i32)
14898  SDNode *InNode = InDouble.getNode();
14899  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
14900  InNode->getValueType(0) == MVT::f64 &&
14901  InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
14902  !cast<LoadSDNode>(InNode)->isVolatile()) {
14903  // TODO: Should this be done for non-FrameIndex operands?
14904  LoadSDNode *LD = cast<LoadSDNode>(InNode);
14905 
14906  SelectionDAG &DAG = DCI.DAG;
14907  SDLoc DL(LD);
14908  SDValue BasePtr = LD->getBasePtr();
14909  SDValue NewLD1 =
14910  DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
14911  LD->getAlign(), LD->getMemOperand()->getFlags());
14912 
14913  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
14914  DAG.getConstant(4, DL, MVT::i32));
14915 
14916  SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
14917  LD->getPointerInfo().getWithOffset(4),
14918  commonAlignment(LD->getAlign(), 4),
14919  LD->getMemOperand()->getFlags());
14920 
14921  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
14922  if (DCI.DAG.getDataLayout().isBigEndian())
14923  std::swap (NewLD1, NewLD2);
14924  SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
14925  return Result;
14926  }
14927 
14928  // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
14929  // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
14930  if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14931  isa<ConstantSDNode>(InDouble.getOperand(1))) {
14932  SDValue BV = InDouble.getOperand(0);
14933  // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
14934  // change lane order under big endian.
14935  bool BVSwap = BV.getOpcode() == ISD::BITCAST;
14936  while (
14937  (BV.getOpcode() == ISD::BITCAST ||
14939  (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
14940  BVSwap = BV.getOpcode() == ISD::BITCAST;
14941  BV = BV.getOperand(0);
14942  }
14943  if (BV.getValueType() != MVT::v4i32)
14944  return SDValue();
14945 
14946  // Handle buildvectors, pulling out the correct lane depending on
14947  // endianness.
14948  unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
14949  if (BV.getOpcode() == ISD::BUILD_VECTOR) {
14950  SDValue Op0 = BV.getOperand(Offset);
14951  SDValue Op1 = BV.getOperand(Offset + 1);
14952  if (!Subtarget->isLittle() && BVSwap)
14953  std::swap(Op0, Op1);
14954 
14955  return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
14956  }
14957 
14958  // A chain of insert_vectors, grabbing the correct value of the chain of
14959  // inserts.
14960  SDValue Op0, Op1;
14961  while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
14962  if (isa<ConstantSDNode>(BV.getOperand(2))) {
14963  if (BV.getConstantOperandVal(2) == Offset)
14964  Op0 = BV.getOperand(1);
14965  if (BV.getConstantOperandVal(2) == Offset + 1)
14966  Op1 = BV.getOperand(1);
14967  }
14968  BV = BV.getOperand(0);
14969  }
14970  if (!Subtarget->isLittle() && BVSwap)
14971  std::swap(Op0, Op1);
14972  if (Op0 && Op1)
14973  return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
14974  }
14975 
14976  return SDValue();
14977 }
14978 
14979 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for
14980 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
14982  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
14983  SDValue Op0 = N->getOperand(0);
14984  SDValue Op1 = N->getOperand(1);
14985  if (Op0.getOpcode() == ISD::BITCAST)
14986  Op0 = Op0.getOperand(0);
14987  if (Op1.getOpcode() == ISD::BITCAST)
14988  Op1 = Op1.getOperand(0);
14989  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
14990  Op0.getNode() == Op1.getNode() &&
14991  Op0.getResNo() == 0 && Op1.getResNo() == 1)
14992  return DAG.getNode(ISD::BITCAST, SDLoc(N),
14993  N->getValueType(0), Op0.getOperand(0));
14994  return SDValue();
14995 }
14996 
14999  SDValue Op0 = N->getOperand(0);
15000 
15001  // VMOVhr (VMOVrh (X)) -> X
15002  if (Op0->getOpcode() == ARMISD::VMOVrh)
15003  return Op0->getOperand(0);
15004 
15005  // FullFP16: half values are passed in S-registers, and we don't
15006  // need any of the bitcast and moves:
15007  //
15008  // t2: f32,ch = CopyFromReg t0, Register:f32 %0
15009  // t5: i32 = bitcast t2
15010  // t18: f16 = ARMISD::VMOVhr t5
15011  if (Op0->getOpcode() == ISD::BITCAST) {
15012  SDValue Copy = Op0->getOperand(0);
15013  if (Copy.getValueType() == MVT::f32 &&
15014  Copy->getOpcode() == ISD::CopyFromReg) {
15015  SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1)};
15016  SDValue NewCopy =
15017  DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N), N->getValueType(0), Ops);
15018  return NewCopy;
15019  }
15020  }
15021 
15022  // fold (VMOVhr (load x)) -> (load (f16*)x)
15023  if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15024  if (LN0->hasOneUse() && LN0->isUnindexed() &&
15025  LN0->getMemoryVT() == MVT::i16) {
15026  SDValue Load =
15027  DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15028  LN0->getBasePtr(), LN0->getMemOperand());
15029  DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15030  DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15031  return Load;
15032  }
15033  }
15034 
15035  // Only the bottom 16 bits of the source register are used.
15036  APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15037  const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15038  if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15039  return SDValue(N, 0);
15040 
15041  return SDValue();
15042 }
15043 
15045  SDValue N0 = N->getOperand(0);
15046  EVT VT = N->getValueType(0);
15047 
15048  // fold (VMOVrh (fpconst x)) -> const x
15049  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
15050  APFloat V = C->getValueAPF();
15051  return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15052  }
15053 
15054  // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15055  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15056  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15057 
15058  SDValue Load =
15059  DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15060  LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15061  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15062  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15063  return Load;
15064  }
15065 
15066  // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15067  if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15068  isa<ConstantSDNode>(N0->getOperand(1)))
15069  return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15070  N0->getOperand(1));
15071 
15072  return SDValue();
15073 }
15074 
15075 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15076 /// are normal, non-volatile loads. If so, it is profitable to bitcast an
15077 /// i64 vector to have f64 elements, since the value can then be loaded
15078 /// directly into a VFP register.
15080  unsigned NumElts = N->getValueType(0).getVectorNumElements();
15081  for (unsigned i = 0; i < NumElts; ++i) {
15082  SDNode *Elt = N->getOperand(i).getNode();
15083  if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15084  return true;
15085  }
15086  return false;
15087 }
15088 
15089 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15090 /// ISD::BUILD_VECTOR.
15093  const ARMSubtarget *Subtarget) {
15094  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15095  // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15096  // into a pair of GPRs, which is fine when the value is used as a scalar,
15097  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15098  SelectionDAG &DAG = DCI.DAG;
15099  if (N->getNumOperands() == 2)
15100  if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15101  return RV;
15102 
15103  // Load i64 elements as f64 values so that type legalization does not split
15104  // them up into i32 values.
15105  EVT VT = N->getValueType(0);
15107  return SDValue();
15108  SDLoc dl(N);
15110  unsigned NumElts = VT.getVectorNumElements();
15111  for (unsigned i = 0; i < NumElts; ++i) {
15112  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15113  Ops.push_back(V);
15114  // Make the DAGCombiner fold the bitcast.
15115  DCI.AddToWorklist(V.getNode());
15116  }
15117  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15118  SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15119  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15120 }
15121 
15122 /// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15123 static SDValue
15125  // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15126  // At that time, we may have inserted bitcasts from integer to float.
15127  // If these bitcasts have survived DAGCombine, change the lowering of this
15128  // BUILD_VECTOR in something more vector friendly, i.e., that does not
15129  // force to use floating point types.
15130 
15131  // Make sure we can change the type of the vector.
15132  // This is possible iff:
15133  // 1. The vector is only used in a bitcast to a integer type. I.e.,
15134  // 1.1. Vector is used only once.
15135  // 1.2. Use is a bit convert to an integer type.
15136  // 2. The size of its operands are 32-bits (64-bits are not legal).
15137  EVT VT = N->getValueType(0);
15138  EVT EltVT = VT.getVectorElementType();
15139 
15140  // Check 1.1. and 2.
15141  if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15142  return SDValue();
15143 
15144  // By construction, the input type must be float.
15145  assert(EltVT == MVT::f32 && "Unexpected type!");
15146 
15147  // Check 1.2.
15148  SDNode *Use = *N->use_begin();
15149  if (Use->getOpcode() != ISD::BITCAST ||
15150  Use->getValueType(0).isFloatingPoint())
15151  return SDValue();
15152 
15153  // Check profitability.
15154  // Model is, if more than half of the relevant operands are bitcast from
15155  // i32, turn the build_vector into a sequence of insert_vector_elt.
15156  // Relevant operands are everything that is not statically
15157  // (i.e., at compile time) bitcasted.
15158  unsigned NumOfBitCastedElts = 0;
15159  unsigned NumElts = VT.getVectorNumElements();
15160  unsigned NumOfRelevantElts = NumElts;
15161  for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15162  SDValue Elt = N->getOperand(Idx);
15163  if (Elt->getOpcode() == ISD::BITCAST) {
15164  // Assume only bit cast to i32 will go away.
15165  if (Elt->getOperand(0).getValueType() == MVT::i32)
15166  ++NumOfBitCastedElts;
15167  } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15168  // Constants are statically casted, thus do not count them as
15169  // relevant operands.
15170  --NumOfRelevantElts;
15171  }
15172 
15173  // Check if more than half of the elements require a non-free bitcast.
15174  if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15175  return SDValue();
15176 
15177  SelectionDAG &DAG = DCI.DAG;
15178  // Create the new vector type.
15179  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15180  // Check if the type is legal.
15181  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15182  if (!TLI.isTypeLegal(VecVT))
15183  return SDValue();
15184 
15185  // Combine:
15186  // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15187  // => BITCAST INSERT_VECTOR_ELT
15188  // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15189  // (BITCAST EN), N.
15190  SDValue Vec = DAG.getUNDEF(VecVT);
15191  SDLoc dl(N);
15192  for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15193  SDValue V = N->getOperand(Idx);
15194  if (V.isUndef())
15195  continue;
15196  if (V.getOpcode() == ISD::BITCAST &&
15197  V->getOperand(0).getValueType() == MVT::i32)
15198  // Fold obvious case.
15199  V = V.getOperand(0);
15200  else {
15201  V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15202  // Make the DAGCombiner fold the bitcasts.
15203  DCI.AddToWorklist(V.getNode());
15204  }
15205  SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15206  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15207  }
15208  Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15209  // Make the DAGCombiner fold the bitcasts.
15210  DCI.AddToWorklist(Vec.getNode());
15211  return Vec;
15212 }
15213 
15214 static SDValue
15216  EVT VT = N->getValueType(0);
15217  SDValue Op = N->getOperand(0);
15218  SDLoc dl(N);
15219 
15220  // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15221  if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15222  // If the valuetypes are the same, we can remove the cast entirely.
15223  if (Op->getOperand(0).getValueType() == VT)
15224  return Op->getOperand(0);
15225  return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15226  }
15227 
15228  // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15229  // more VPNOT which might get folded as else predicates.
15230  if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15231  SDValue X =
15232  DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15233  SDValue C = DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
15234  DCI.DAG.getConstant(65535, dl, MVT::i32));
15235  return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15236  }
15237 
15238  // Only the bottom 16 bits of the source register are used.
15239  if (Op.getValueType() == MVT::i32) {
15240  APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15241  const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15242  if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15243  return SDValue(N, 0);
15244  }
15245  return SDValue();
15246 }
15247 
15249  const ARMSubtarget *ST) {
15250  EVT VT = N->getValueType(0);
15251  SDValue Op = N->getOperand(0);
15252  SDLoc dl(N);
15253 
15254  // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15255  if (ST->isLittle())
15256  return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15257 
15258  // VECTOR_REG_CAST undef -> undef
15259  if (Op.isUndef())
15260  return DAG.getUNDEF(VT);
15261 
15262  // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15263  if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15264  // If the valuetypes are the same, we can remove the cast entirely.
15265  if (Op->getOperand(0).getValueType() == VT)
15266  return Op->getOperand(0);
15267  return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15268  }
15269 
15270  return SDValue();
15271 }
15272 
15274  const ARMSubtarget *Subtarget) {
15275  if (!Subtarget->hasMVEIntegerOps())
15276  return SDValue();
15277 
15278  EVT VT = N->getValueType(0);
15279  SDValue Op0 = N->getOperand(0);
15280  SDValue Op1 = N->getOperand(1);
15282  (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
15283  SDLoc dl(N);
15284 
15285  // vcmp X, 0, cc -> vcmpz X, cc
15286  if (isZeroVector(Op1))
15287  return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15288 
15289  unsigned SwappedCond = getSwappedCondition(Cond);
15290  if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15291  // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15292  if (isZeroVector(Op0))
15293  return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15294  DAG.getConstant(SwappedCond, dl, MVT::i32));
15295  // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15296  if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15297  return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15298  DAG.getConstant(SwappedCond, dl, MVT::i32));
15299  }
15300 
15301  return SDValue();
15302 }
15303 
15304 /// PerformInsertEltCombine - Target-specific dag combine xforms for
15305 /// ISD::INSERT_VECTOR_ELT.
15308  // Bitcast an i64 load inserted into a vector to f64.
15309  // Otherwise, the i64 value will be legalized to a pair of i32 values.
15310  EVT VT = N->getValueType(0);
15311  SDNode *Elt = N->getOperand(1).getNode();
15312  if (VT.getVectorElementType() != MVT::i64 ||
15313  !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15314  return SDValue();
15315 
15316  SelectionDAG &DAG = DCI.DAG;
15317  SDLoc dl(N);
15318  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15319  VT.getVectorNumElements());
15320  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15321  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15322  // Make the DAGCombiner fold the bitcasts.
15323  DCI.AddToWorklist(Vec.getNode());
15324  DCI.AddToWorklist(V.getNode());
15325  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15326  Vec, V, N->getOperand(2));
15327  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15328 }
15329 
15330 // Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15331 // directly or bitcast to an integer if the original is a float vector.
15332 // extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15333 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15334 static SDValue
15336  EVT VT = N->getValueType(0);
15337  SDLoc dl(N);
15338 
15339  if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15341  return SDValue();
15342 
15343  SDValue Ext = SDValue(N, 0);
15344  if (Ext.getOpcode() == ISD::BITCAST &&
15345  Ext.getOperand(0).getValueType() == MVT::f32)
15346  Ext = Ext.getOperand(0);
15347  if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15348  !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15349  Ext.getConstantOperandVal(1) % 2 != 0)
15350  return SDValue();
15351  if (Ext->use_size() == 1 &&
15352  (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
15353  Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
15354  return SDValue();
15355 
15356  SDValue Op0 = Ext.getOperand(0);
15357  EVT VecVT = Op0.getValueType();
15358  unsigned ResNo = Op0.getResNo();
15359  unsigned Lane = Ext.getConstantOperandVal(1);
15360  if (VecVT.getVectorNumElements() != 4)
15361  return SDValue();
15362 
15363  // Find another extract, of Lane + 1
15364  auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
15365  return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15366  isa<ConstantSDNode>(V->getOperand(1)) &&
15367  V->getConstantOperandVal(1) == Lane + 1 &&
15368  V->getOperand(0).getResNo() == ResNo;
15369  });
15370  if (OtherIt == Op0->uses().end())
15371  return SDValue();
15372 
15373  // For float extracts, we need to be converting to a i32 for both vector
15374  // lanes.
15375  SDValue OtherExt(*OtherIt, 0);
15376  if (OtherExt.getValueType() != MVT::i32) {
15377  if (OtherExt->use_size() != 1 ||
15378  OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
15379  OtherExt->use_begin()->getValueType(0) != MVT::i32)
15380  return SDValue();
15381  OtherExt = SDValue(*OtherExt->use_begin(), 0);
15382  }
15383 
15384  // Convert the type to a f64 and extract with a VMOVRRD.
15385  SDValue F64 = DCI.DAG.getNode(
15388  DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15389  SDValue VMOVRRD =
15390  DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15391 
15392  DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15393  return VMOVRRD;
15394 }
15395 
15398  const ARMSubtarget *ST) {
15399  SDValue Op0 = N->getOperand(0);
15400  EVT VT = N->getValueType(0);
15401  SDLoc dl(N);
15402 
15403  // extract (vdup x) -> x
15404  if (Op0->getOpcode() == ARMISD::VDUP) {
15405  SDValue X = Op0->getOperand(0);
15406  if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15407  return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15408  if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15409  return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15410  if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15411  return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15412 
15413  while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15414  X = X->getOperand(0);
15415  if (X.getValueType() == VT)
15416  return X;
15417  }
15418 
15419  // extract ARM_BUILD_VECTOR -> x
15420  if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15421  isa<ConstantSDNode>(N->getOperand(1)) &&
15422  N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15423  return Op0.getOperand(N->getConstantOperandVal(1));
15424  }
15425 
15426  // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15427  if (Op0.getValueType() == MVT::v4i32 &&
15428  isa<ConstantSDNode>(N->getOperand(1)) &&
15429  Op0.getOpcode() == ISD::BITCAST &&
15430  Op0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
15431  Op0.getOperand(0).getValueType() == MVT::v2f64) {
15432  SDValue BV = Op0.getOperand(0);
15433  unsigned Offset = N->getConstantOperandVal(1);
15434  SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15435  if (MOV.getOpcode() == ARMISD::VMOVDRR)
15436  return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15437  }
15438 
15439  // extract x, n; extract x, n+1 -> VMOVRRD x
15440  if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15441  return R;
15442 
15443  // extract (MVETrunc(x)) -> extract x
15444  if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15445  unsigned Idx = N->getConstantOperandVal(1);
15446  unsigned Vec =
15447  Idx / Op0->getOperand(0).getValueType().getVectorNumElements();
15448  unsigned SubIdx =
15449  Idx % Op0->getOperand(0).getValueType().getVectorNumElements();
15450  return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15451  DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15452  }
15453 
15454  return SDValue();
15455 }
15456 
15458  SDValue Op = N->getOperand(0);
15459  EVT VT = N->getValueType(0);
15460 
15461  // sext_inreg(VGETLANEu) -> VGETLANEs
15462  if (Op.getOpcode() == ARMISD::VGETLANEu &&
15463  cast<VTSDNode>(N->getOperand(1))->getVT() ==
15464  Op.getOperand(0).getValueType().getScalarType())
15465  return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15466  Op.getOperand(1));
15467 
15468  return SDValue();
15469 }
15470 
15471 static SDValue
15473  SDValue Vec = N->getOperand(0);
15474  SDValue SubVec = N->getOperand(1);
15475  uint64_t IdxVal = N->getConstantOperandVal(2);
15476  EVT VecVT = Vec.getValueType();
15477  EVT SubVT = SubVec.getValueType();
15478 
15479  // Only do this for legal fixed vector types.
15480  if (!VecVT.isFixedLengthVector() ||
15481  !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15482  !DCI.DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
15483  return SDValue();
15484 
15485  // Ignore widening patterns.
15486  if (IdxVal == 0 && Vec.isUndef())
15487  return SDValue();
15488 
15489  // Subvector must be half the width and an "aligned" insertion.
15490  unsigned NumSubElts = SubVT.getVectorNumElements();
15491  if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15492  (IdxVal != 0 && IdxVal != NumSubElts))
15493  return SDValue();
15494 
15495  // Fold insert_subvector -> concat_vectors
15496  // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15497  // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15498  SDLoc DL(N);
15499  SDValue Lo, Hi;
15500  if (IdxVal == 0) {
15501  Lo = SubVec;
15502  Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15503  DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15504  } else {
15505  Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15506  DCI.DAG.getVectorIdxConstant(0, DL));
15507  Hi = SubVec;
15508  }
15509  return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15510 }
15511 
15512 // shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15514  SelectionDAG &DAG) {
15515  SDValue Trunc = N->getOperand(0);
15516  EVT VT = Trunc.getValueType();
15517  if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15518  return SDValue();
15519 
15520  SDLoc DL(Trunc);
15521  if (isVMOVNTruncMask(N->getMask(), VT, false))
15522  return DAG.getNode(
15523  ARMISD::VMOVN, DL, VT,
15524  DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15525  DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15526  DAG.getConstant(1, DL, MVT::i32));
15527  else if (isVMOVNTruncMask(N->getMask(), VT, true))
15528  return DAG.getNode(
15529  ARMISD::VMOVN, DL, VT,
15530  DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15531  DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15532  DAG.getConstant(1, DL, MVT::i32));
15533  return SDValue();
15534 }
15535 
15536 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15537 /// ISD::VECTOR_SHUFFLE.
15539  if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15540  return R;
15541 
15542  // The LLVM shufflevector instruction does not require the shuffle mask
15543  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15544  // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15545  // operands do not match the mask length, they are extended by concatenating
15546  // them with undef vectors. That is probably the right thing for other
15547  // targets, but for NEON it is better to concatenate two double-register
15548  // size vector operands into a single quad-register size vector. Do that
15549  // transformation here:
15550  // shuffle(concat(v1, undef), concat(v2, undef)) ->
15551  // shuffle(concat(v1, v2), undef)
15552  SDValue Op0 = N->getOperand(0);
15553  SDValue Op1 = N->getOperand(1);
15554  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15555  Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15556  Op0.getNumOperands() != 2 ||
15557  Op1.getNumOperands() != 2)
15558  return SDValue();
15559  SDValue Concat0Op1 = Op0.getOperand(1);
15560  SDValue Concat1Op1 = Op1.getOperand(1);
15561  if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15562  return SDValue();
15563  // Skip the transformation if any of the types are illegal.
15564  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15565  EVT VT = N->getValueType(0);
15566  if (!TLI.isTypeLegal(VT) ||
15567  !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15568  !TLI.isTypeLegal(Concat1Op1.getValueType()))
15569  return SDValue();
15570 
15571  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15572  Op0.getOperand(0), Op1.getOperand(0));
15573  // Translate the shuffle mask.
15574  SmallVector<int, 16> NewMask;
15575  unsigned NumElts = VT.getVectorNumElements();
15576  unsigned HalfElts = NumElts/2;
15577  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15578  for (unsigned n = 0; n < NumElts; ++n) {
15579  int MaskElt = SVN->getMaskElt(n);
15580  int NewElt = -1;
15581  if (MaskElt < (int)HalfElts)
15582  NewElt = MaskElt;
15583  else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15584  NewElt = HalfElts + MaskElt - NumElts;
15585  NewMask.push_back(NewElt);
15586  }
15587  return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15588  DAG.getUNDEF(VT), NewMask);
15589 }
15590 
15591 /// Load/store instruction that can be merged with a base address
15592 /// update
15596  bool isStore;
15597  unsigned AddrOpIdx;
15598 };
15599 
15601  /// Instruction that updates a pointer
15603  /// Pointer increment operand
15605  /// Pointer increment value if it is a constant, or 0 otherwise
15606  unsigned ConstInc;
15607 };
15608 
15610  struct BaseUpdateUser &User,
15611  bool SimpleConstIncOnly,
15613  SelectionDAG &DAG = DCI.DAG;
15614  SDNode *N = Target.N;
15615  MemSDNode *MemN = cast<MemSDNode>(N);
15616  SDLoc dl(N);
15617 
15618  // Find the new opcode for the updating load/store.
15619  bool isLoadOp = true;
15620  bool isLaneOp = false;
15621  // Workaround for vst1x and vld1x intrinsics which do not have alignment
15622  // as an operand.
15623  bool hasAlignment = true;
15624  unsigned NewOpc = 0;
15625  unsigned NumVecs = 0;
15626  if (Target.isIntrinsic) {
15627  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
15628  switch (IntNo) {
15629  default:
15630  llvm_unreachable("unexpected intrinsic for Neon base update");
15631  case Intrinsic::arm_neon_vld1:
15632  NewOpc = ARMISD::VLD1_UPD;
15633  NumVecs = 1;
15634  break;
15635  case Intrinsic::arm_neon_vld2:
15636  NewOpc = ARMISD::VLD2_UPD;
15637  NumVecs = 2;
15638  break;
15639  case Intrinsic::arm_neon_vld3:
15640  NewOpc = ARMISD::VLD3_UPD;
15641  NumVecs = 3;
15642  break;
15643  case Intrinsic::arm_neon_vld4:
15644  NewOpc = ARMISD::VLD4_UPD;
15645  NumVecs = 4;
15646  break;
15647  case Intrinsic::arm_neon_vld1x2:
15648  NewOpc = ARMISD::VLD1x2_UPD;
15649  NumVecs = 2;
15650  hasAlignment = false;
15651  break;
15652  case Intrinsic::arm_neon_vld1x3:
15653  NewOpc = ARMISD::VLD1x3_UPD;
15654  NumVecs = 3;
15655  hasAlignment = false;
15656  break;
15657  case Intrinsic::arm_neon_vld1x4:
15658  NewOpc = ARMISD::VLD1x4_UPD;
15659  NumVecs = 4;
15660  hasAlignment = false;
15661  break;
15662  case Intrinsic::arm_neon_vld2dup:
15663  NewOpc = ARMISD::VLD2DUP_UPD;
15664  NumVecs = 2;
15665  break;
15666  case Intrinsic::arm_neon_vld3dup:
15667  NewOpc = ARMISD::VLD3DUP_UPD;
15668  NumVecs = 3;
15669  break;
15670  case Intrinsic::arm_neon_vld4dup:
15671  NewOpc = ARMISD::VLD4DUP_UPD;
15672  NumVecs = 4;
15673  break;
15674  case Intrinsic::arm_neon_vld2lane:
15675  NewOpc = ARMISD::VLD2LN_UPD;
15676  NumVecs = 2;
15677  isLaneOp = true;
15678  break;
15679  case Intrinsic::arm_neon_vld3lane:
15680  NewOpc = ARMISD::VLD3LN_UPD;
15681  NumVecs = 3;
15682  isLaneOp = true;
15683  break;
15684  case Intrinsic::arm_neon_vld4lane:
15685  NewOpc = ARMISD::VLD4LN_UPD;
15686  NumVecs = 4;
15687  isLaneOp = true;
15688  break;
15689  case Intrinsic::arm_neon_vst1:
15690  NewOpc = ARMISD::VST1_UPD;
15691  NumVecs = 1;
15692  isLoadOp = false;
15693  break;
15694  case Intrinsic::arm_neon_vst2:
15695  NewOpc = ARMISD::VST2_UPD;
15696  NumVecs = 2;
15697  isLoadOp = false;
15698  break;
15699  case Intrinsic::arm_neon_vst3:
15700  NewOpc = ARMISD::VST3_UPD;
15701  NumVecs = 3;
15702  isLoadOp = false;
15703  break;
15704  case Intrinsic::arm_neon_vst4:
15705  NewOpc = ARMISD::VST4_UPD;
15706  NumVecs = 4;
15707  isLoadOp = false;
15708  break;
15709  case Intrinsic::arm_neon_vst2lane:
15710  NewOpc = ARMISD::VST2LN_UPD;
15711  NumVecs = 2;
15712  isLoadOp = false;
15713  isLaneOp = true;
15714  break;
15715  case Intrinsic::arm_neon_vst3lane:
15716  NewOpc = ARMISD::VST3LN_UPD;
15717  NumVecs = 3;
15718  isLoadOp = false;
15719  isLaneOp = true;
15720  break;
15721  case Intrinsic::arm_neon_vst4lane:
15722  NewOpc = ARMISD::VST4LN_UPD;
15723  NumVecs = 4;
15724  isLoadOp = false;
15725  isLaneOp = true;
15726  break;
15727  case Intrinsic::arm_neon_vst1x2:
15728  NewOpc = ARMISD::VST1x2_UPD;
15729  NumVecs = 2;
15730  isLoadOp = false;
15731  hasAlignment = false;
15732  break;
15733  case Intrinsic::arm_neon_vst1x3:
15734  NewOpc = ARMISD::VST1x3_UPD;
15735  NumVecs = 3;
15736  isLoadOp = false;
15737  hasAlignment = false;
15738  break;
15739  case Intrinsic::arm_neon_vst1x4:
15740  NewOpc = ARMISD::VST1x4_UPD;
15741  NumVecs = 4;
15742  isLoadOp = false;
15743  hasAlignment = false;
15744  break;
15745  }
15746  } else {
15747  isLaneOp = true;
15748  switch (N->getOpcode()) {
15749  default:
15750  llvm_unreachable("unexpected opcode for Neon base update");
15751  case ARMISD::VLD1DUP:
15752  NewOpc = ARMISD::VLD1DUP_UPD;
15753  NumVecs = 1;
15754  break;
15755  case ARMISD::VLD2DUP:
15756  NewOpc = ARMISD::VLD2DUP_UPD;
15757  NumVecs = 2;
15758  break;
15759  case ARMISD::VLD3DUP:
15760  NewOpc = ARMISD::VLD3DUP_UPD;
15761  NumVecs = 3;
15762  break;
15763  case ARMISD::VLD4DUP:
15764  NewOpc = ARMISD::VLD4DUP_UPD;
15765  NumVecs = 4;
15766  break;
15767  case ISD::LOAD:
15768  NewOpc = ARMISD::VLD1_UPD;
15769  NumVecs = 1;
15770  isLaneOp = false;
15771  break;
15772  case ISD::STORE:
15773  NewOpc = ARMISD::VST1_UPD;
15774  NumVecs = 1;
15775  isLaneOp = false;
15776  isLoadOp = false;
15777  break;
15778  }
15779  }
15780 
15781  // Find the size of memory referenced by the load/store.
15782  EVT VecTy;
15783  if (isLoadOp) {
15784  VecTy = N->getValueType(0);
15785  } else if (Target.isIntrinsic) {
15786  VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15787  } else {
15788  assert(Target.isStore &&
15789  "Node has to be a load, a store, or an intrinsic!");
15790  VecTy = N->getOperand(1).getValueType();
15791  }
15792 
15793  bool isVLDDUPOp =
15794  NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
15795  NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
15796 
15797  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15798  if (isLaneOp || isVLDDUPOp)
15799  NumBytes /= VecTy.getVectorNumElements();
15800 
15801  if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
15802  // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15803  // separate instructions that make it harder to use a non-constant update.
15804  return false;
15805  }
15806 
15807  if (SimpleConstIncOnly && User.ConstInc != NumBytes)
15808  return false;
15809 
15810  // OK, we found an ADD we can fold into the base update.
15811  // Now, create a _UPD node, taking care of not breaking alignment.
15812 
15813  EVT AlignedVecTy = VecTy;
15814  Align Alignment = MemN->getAlign();
15815 
15816  // If this is a less-than-standard-aligned load/store, change the type to
15817  // match the standard alignment.
15818  // The alignment is overlooked when selecting _UPD variants; and it's
15819  // easier to introduce bitcasts here than fix that.
15820  // There are 3 ways to get to this base-update combine:
15821  // - intrinsics: they are assumed to be properly aligned (to the standard
15822  // alignment of the memory type), so we don't need to do anything.
15823  // - ARMISD::VLDx nodes: they are only generated from the aforementioned
15824  // intrinsics, so, likewise, there's nothing to do.
15825  // - generic load/store instructions: the alignment is specified as an
15826  // explicit operand, rather than implicitly as the standard alignment
15827  // of the memory type (like the intrisics). We need to change the
15828  // memory type to match the explicit alignment. That way, we don't
15829  // generate non-standard-aligned ARMISD::VLDx nodes.
15830  if (isa<LSBaseSDNode>(N)) {
15831  if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
15832  MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
15833  assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
15834  assert(!isLaneOp && "Unexpected generic load/store lane.");
15835  unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
15836  AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
15837  }
15838  // Don't set an explicit alignment on regular load/stores that we want
15839  // to transform to VLD/VST 1_UPD nodes.
15840  // This matches the behavior of regular load/stores, which only get an
15841  // explicit alignment if the MMO alignment is larger than the standard
15842  // alignment of the memory type.
15843  // Intrinsics, however, always get an explicit alignment, set to the
15844  // alignment of the MMO.
15845  Alignment = Align(1);
15846  }
15847 
15848  // Create the new updating load/store node.
15849  // First, create an SDVTList for the new updating node's results.
15850  EVT Tys[6];
15851  unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
15852  unsigned n;
15853  for (n = 0; n < NumResultVecs; ++n)
15854  Tys[n] = AlignedVecTy;
15855  Tys[n++] = MVT::i32;
15856  Tys[n] = MVT::Other;
15857  SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
15858 
15859  // Then, gather the new node's operands.
15861  Ops.push_back(N->getOperand(0)); // incoming chain
15862  Ops.push_back(N->getOperand(Target.AddrOpIdx));
15863  Ops.push_back(User.Inc);
15864 
15865  if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
15866  // Try to match the intrinsic's signature
15867  Ops.push_back(StN->getValue());
15868  } else {
15869  // Loads (and of course intrinsics) match the intrinsics' signature,
15870  // so just add all but the alignment operand.
15871  unsigned LastOperand =
15872  hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
15873  for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
15874  Ops.push_back(N->getOperand(i));
15875  }
15876 
15877  // For all node types, the alignment operand is always the last one.
15878  Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
15879 
15880  // If this is a non-standard-aligned STORE, the penultimate operand is the
15881  // stored value. Bitcast it to the aligned type.
15882  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
15883  SDValue &StVal = Ops[Ops.size() - 2];
15884  StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
15885  }
15886 
15887  EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
15888  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
15889  MemN->getMemOperand());
15890 
15891  // Update the uses.
15892  SmallVector<SDValue, 5> NewResults;
15893  for (unsigned i = 0; i < NumResultVecs; ++i)
15894  NewResults.push_back(SDValue(UpdN.getNode(), i));
15895 
15896  // If this is an non-standard-aligned LOAD, the first result is the loaded
15897  // value. Bitcast it to the expected result type.
15898  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
15899  SDValue &LdVal = NewResults[0];
15900  LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
15901  }
15902 
15903  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
15904  DCI.CombineTo(N, NewResults);
15905  DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
15906 
15907  return true;
15908 }
15909 
15910 // If (opcode ptr inc) is and ADD-like instruction, return the
15911 // increment value. Otherwise return 0.
15912 static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
15913  SDValue Inc, const SelectionDAG &DAG) {
15914  ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
15915  if (!CInc)
15916  return 0;
15917 
15918  switch (Opcode) {
15919  case ARMISD::VLD1_UPD:
15920  case ISD::ADD:
15921  return CInc->getZExtValue();
15922  case ISD::OR: {
15923  if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
15924  // (OR ptr inc) is the same as (ADD ptr inc)
15925  return CInc->getZExtValue();
15926  }
15927  return 0;
15928  }
15929  default:
15930  return 0;
15931  }
15932 }
15933 
15935  switch (N->getOpcode()) {
15936  case ISD::ADD:
15937  case ISD::OR: {
15938  if (isa<ConstantSDNode>(N->getOperand(1))) {
15939  *Ptr = N->getOperand(0);
15940  *CInc = N->getOperand(1);
15941  return true;
15942  }
15943  return false;
15944  }
15945  case ARMISD::VLD1_UPD: {
15946  if (isa<ConstantSDNode>(N->getOperand(2))) {
15947  *Ptr = N->getOperand(1);
15948  *CInc = N->getOperand(2);
15949  return true;
15950  }
15951  return false;
15952  }
15953  default:
15954  return false;
15955  }
15956 }
15957 
15959  // Check that the add is independent of the load/store.
15960  // Otherwise, folding it would create a cycle. Search through Addr
15961  // as well, since the User may not be a direct user of Addr and
15962  // only share a base pointer.
15965  Worklist.push_back(N);
15966  Worklist.push_back(User);
15967  if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
15968  SDNode::hasPredecessorHelper(User, Visited, Worklist))
15969  return false;
15970  return true;
15971 }
15972 
15973 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
15974 /// NEON load/store intrinsics, and generic vector load/stores, to merge
15975 /// base address updates.
15976 /// For generic load/stores, the memory type is assumed to be a vector.
15977 /// The caller is assumed to have checked legality.
15980  const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
15981  N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
15982  const bool isStore = N->getOpcode() == ISD::STORE;
15983  const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
15984  BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
15985 
15986  SDValue Addr = N->getOperand(AddrOpIdx);
15987 
15988  SmallVector<BaseUpdateUser, 8> BaseUpdates;
15989 
15990  // Search for a use of the address operand that is an increment.
15991  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
15992  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
15993  SDNode *User = *UI;
15994  if (UI.getUse().getResNo() != Addr.getResNo() ||
15995  User->getNumOperands() != 2)
15996  continue;
15997 
15998  SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
15999  unsigned ConstInc =
16000  getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16001 
16002  if (ConstInc || User->getOpcode() == ISD::ADD)
16003  BaseUpdates.push_back({User, Inc, ConstInc});
16004  }
16005 
16006  // If the address is a constant pointer increment itself, find
16007  // another constant increment that has the same base operand
16008  SDValue Base;
16009  SDValue CInc;
16010  if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16011  unsigned Offset =
16012  getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16013  for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
16014  UI != UE; ++UI) {
16015 
16016  SDNode *User = *UI;
16017  if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
16018  User->getNumOperands() != 2)
16019  continue;
16020 
16021  SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
16022  unsigned UserOffset =
16023  getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16024 
16025  if (!UserOffset || UserOffset <= Offset)
16026  continue;
16027 
16028  unsigned NewConstInc = UserOffset - Offset;
16029  SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16030  BaseUpdates.push_back({User, NewInc, NewConstInc});
16031  }
16032  }
16033 
16034  // Try to fold the load/store with an update that matches memory
16035  // access size. This should work well for sequential loads.
16036  //
16037  // Filter out invalid updates as well.
16038  unsigned NumValidUpd = BaseUpdates.size();
16039  for (unsigned I = 0; I < NumValidUpd;) {
16040  BaseUpdateUser &User = BaseUpdates[I];
16041  if (!isValidBaseUpdate(N, User.N)) {
16042  --NumValidUpd;
16043  std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16044  continue;
16045  }
16046 
16047  if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16048  return SDValue();
16049  ++I;
16050  }
16051  BaseUpdates.resize(NumValidUpd);
16052 
16053  // Try to fold with other users. Non-constant updates are considered
16054  // first, and constant updates are sorted to not break a sequence of
16055  // strided accesses (if there is any).
16056  std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16057  [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16058  return LHS.ConstInc < RHS.ConstInc;
16059  });
16060  for (BaseUpdateUser &User : BaseUpdates) {
16061  if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16062  return SDValue();
16063  }
16064  return SDValue();
16065 }
16066 
16069  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16070  return SDValue();
16071 
16072  return CombineBaseUpdate(N, DCI);
16073 }
16074 
16077  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16078  return SDValue();
16079 
16080  SelectionDAG &DAG = DCI.DAG;
16081  SDValue Addr = N->getOperand(2);
16082  MemSDNode *MemN = cast<MemSDNode>(N);
16083  SDLoc dl(N);
16084 
16085  // For the stores, where there are multiple intrinsics we only actually want
16086  // to post-inc the last of the them.
16087  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
16088  if (IntNo == Intrinsic::arm_mve_vst2q &&
16089  cast<ConstantSDNode>(N->getOperand(5))->getZExtValue() != 1)
16090  return SDValue();
16091  if (IntNo == Intrinsic::arm_mve_vst4q &&
16092  cast<ConstantSDNode>(N->getOperand(7))->getZExtValue() != 3)
16093  return SDValue();
16094 
16095  // Search for a use of the address operand that is an increment.
16096  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16097  UE = Addr.getNode()->use_end();
16098  UI != UE; ++UI) {
16099  SDNode *User = *UI;
16100  if (User->getOpcode() != ISD::ADD ||
16101  UI.getUse().getResNo() != Addr.getResNo())
16102  continue;
16103 
16104  // Check that the add is independent of the load/store. Otherwise, folding
16105  // it would create a cycle. We can avoid searching through Addr as it's a
16106  // predecessor to both.
16109  Visited.insert(Addr.getNode());
16110  Worklist.push_back(N);
16111  Worklist.push_back(User);
16112  if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16113  SDNode::hasPredecessorHelper(User, Visited, Worklist))
16114  continue;
16115 
16116  // Find the new opcode for the updating load/store.
16117  bool isLoadOp = true;
16118  unsigned NewOpc = 0;
16119  unsigned NumVecs = 0;
16120  switch (IntNo) {
16121  default:
16122  llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16123  case Intrinsic::arm_mve_vld2q:
16124  NewOpc = ARMISD::VLD2_UPD;
16125  NumVecs = 2;
16126  break;
16127  case Intrinsic::arm_mve_vld4q:
16128  NewOpc = ARMISD::VLD4_UPD;
16129  NumVecs = 4;
16130  break;
16131  case Intrinsic::arm_mve_vst2q:
16132  NewOpc = ARMISD::VST2_UPD;
16133  NumVecs = 2;
16134  isLoadOp = false;
16135  break;
16136  case Intrinsic::arm_mve_vst4q:
16137  NewOpc = ARMISD::VST4_UPD;
16138  NumVecs = 4;
16139  isLoadOp = false;
16140  break;
16141  }
16142 
16143  // Find the size of memory referenced by the load/store.
16144  EVT VecTy;
16145  if (isLoadOp) {
16146  VecTy = N->getValueType(0);
16147  } else {
16148  VecTy = N->getOperand(3).getValueType();
16149  }
16150 
16151  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16152 
16153  // If the increment is a constant, it must match the memory ref size.
16154  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16155  ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16156  if (!CInc || CInc->getZExtValue() != NumBytes)
16157  continue;
16158 
16159  // Create the new updating load/store node.
16160  // First, create an SDVTList for the new updating node's results.
16161  EVT Tys[6];
16162  unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16163  unsigned n;
16164  for (n = 0; n < NumResultVecs; ++n)
16165  Tys[n] = VecTy;
16166  Tys[n++] = MVT::i32;
16167  Tys[n] = MVT::Other;
16168  SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16169 
16170  // Then, gather the new node's operands.
16172  Ops.push_back(N->getOperand(0)); // incoming chain
16173  Ops.push_back(N->getOperand(2)); // ptr
16174  Ops.push_back(Inc);
16175 
16176  for (unsigned i = 3; i < N->getNumOperands(); ++i)
16177  Ops.push_back(N->getOperand(i));
16178 
16179  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16180  MemN->getMemOperand());
16181 
16182  // Update the uses.
16183  SmallVector<SDValue, 5> NewResults;
16184  for (unsigned i = 0; i < NumResultVecs; ++i)
16185  NewResults.push_back(SDValue(UpdN.getNode(), i));
16186 
16187  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16188  DCI.CombineTo(N, NewResults);
16189  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16190 
16191  break;
16192  }
16193 
16194  return SDValue();
16195 }
16196 
16197 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16198 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16199 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16200 /// return true.
16202  SelectionDAG &DAG = DCI.DAG;
16203  EVT VT = N->getValueType(0);
16204  // vldN-dup instructions only support 64-bit vectors for N > 1.
16205  if (!VT.is64BitVector())
16206  return false;
16207 
16208  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16209  SDNode *VLD = N->getOperand(0).getNode();
16210  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16211  return false;
16212  unsigned NumVecs = 0;
16213  unsigned NewOpc = 0;
16214  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
16215  if (IntNo == Intrinsic::arm_neon_vld2lane) {
16216  NumVecs = 2;
16217  NewOpc = ARMISD::VLD2DUP;
16218  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16219  NumVecs = 3;
16220  NewOpc = ARMISD::VLD3DUP;
16221  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16222  NumVecs = 4;
16223  NewOpc = ARMISD::VLD4DUP;
16224  } else {
16225  return false;
16226  }
16227 
16228  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16229  // numbers match the load.
16230  unsigned VLDLaneNo =
16231  cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
16232  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16233  UI != UE; ++UI) {
16234  // Ignore uses of the chain result.
16235  if (UI.getUse().getResNo() == NumVecs)
16236  continue;
16237  SDNode *User = *UI;
16238  if (User->getOpcode() != ARMISD::VDUPLANE ||
16239  VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
16240  return false;
16241  }
16242 
16243  // Create the vldN-dup node.
16244  EVT Tys[5];
16245  unsigned n;
16246  for (n = 0; n < NumVecs; ++n)
16247  Tys[n] = VT;
16248  Tys[n] = MVT::Other;
16249  SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16250  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16251  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16252  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16253  Ops, VLDMemInt->getMemoryVT(),
16254  VLDMemInt->getMemOperand());
16255 
16256  // Update the uses.
16257  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16258  UI != UE; ++UI) {
16259  unsigned ResNo = UI.getUse().getResNo();
16260  // Ignore uses of the chain result.
16261  if (ResNo == NumVecs)
16262  continue;
16263  SDNode *User = *UI;
16264  DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
16265  }
16266 
16267  // Now the vldN-lane intrinsic is dead except for its chain result.
16268  // Update uses of the chain.
16269  std::vector<SDValue> VLDDupResults;
16270  for (unsigned n = 0; n < NumVecs; ++n)
16271  VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16272  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16273  DCI.CombineTo(VLD, VLDDupResults);
16274 
16275  return true;
16276 }
16277 
16278 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
16279 /// ARMISD::VDUPLANE.
16282  const ARMSubtarget *Subtarget) {
16283  SDValue Op = N->getOperand(0);
16284  EVT VT = N->getValueType(0);
16285 
16286  // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16287  if (Subtarget->hasMVEIntegerOps()) {
16288  EVT ExtractVT = VT.getVectorElementType();
16289  // We need to ensure we are creating a legal type.
16290  if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16291  ExtractVT = MVT::i32;
16292  SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16293  N->getOperand(0), N->getOperand(1));
16294  return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16295  }
16296 
16297  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16298  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16299  if (CombineVLDDUP(N, DCI))
16300  return SDValue(N, 0);
16301 
16302  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16303  // redundant. Ignore bit_converts for now; element sizes are checked below.
16304  while (Op.getOpcode() == ISD::BITCAST)
16305  Op = Op.getOperand(0);
16306  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16307  return SDValue();
16308 
16309  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16310  unsigned EltSize = Op.getScalarValueSizeInBits();
16311  // The canonical VMOV for a zero vector uses a 32-bit element size.
16312  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16313  unsigned EltBits;
16314  if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16315  EltSize = 8;
16316  if (EltSize > VT.getScalarSizeInBits())
16317  return SDValue();
16318 
16319  return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16320 }
16321 
16322 /// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16324  const ARMSubtarget *Subtarget) {
16325  SDValue Op = N->getOperand(0);
16326  SDLoc dl(N);
16327 
16328  if (Subtarget->hasMVEIntegerOps()) {
16329  // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16330  // need to come from a GPR.
16331  if (Op.getValueType() == MVT::f32)
16332  return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16333  DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16334  else if (Op.getValueType() == MVT::f16)
16335  return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16336  DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16337  }
16338 
16339  if (!Subtarget->hasNEON())
16340  return SDValue();
16341 
16342  // Match VDUP(LOAD) -> VLD1DUP.
16343  // We match this pattern here rather than waiting for isel because the
16344  // transform is only legal for unindexed loads.
16345  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16346  if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16347  LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16348  SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16349  DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16350  SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16351  SDValue VLDDup =
16352  DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16353  LD->getMemoryVT(), LD->getMemOperand());
16354  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16355  return VLDDup;
16356  }
16357 
16358  return SDValue();
16359 }
16360 
16363  const ARMSubtarget *Subtarget) {
16364  EVT VT = N->getValueType(0);
16365 
16366  // If this is a legal vector load, try to combine it into a VLD1_UPD.
16367  if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16369  return CombineBaseUpdate(N, DCI);
16370 
16371  return SDValue();
16372 }
16373 
16374 // Optimize trunc store (of multiple scalars) to shuffle and store. First,
16375 // pack all of the elements in one place. Next, store to memory in fewer
16376 // chunks.
16378  SelectionDAG &DAG) {
16379  SDValue StVal = St->getValue();
16380  EVT VT = StVal.getValueType();
16381  if (!St->isTruncatingStore() || !VT.isVector())
16382  return SDValue();
16383  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16384  EVT StVT = St->getMemoryVT();
16385  unsigned NumElems = VT.getVectorNumElements();
16386  assert(StVT != VT && "Cannot truncate to the same type");
16387  unsigned FromEltSz = VT.getScalarSizeInBits();
16388  unsigned ToEltSz = StVT.getScalarSizeInBits();
16389 
16390  // From, To sizes and ElemCount must be pow of two
16391  if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16392  return SDValue();
16393 
16394  // We are going to use the original vector elt for storing.
16395  // Accumulated smaller vector elements must be a multiple of the store size.
16396  if (0 != (NumElems * FromEltSz) % ToEltSz)
16397  return SDValue();
16398 
16399  unsigned SizeRatio = FromEltSz / ToEltSz;
16400  assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16401 
16402  // Create a type on which we perform the shuffle.
16403  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16404  NumElems * SizeRatio);
16405  assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16406 
16407  SDLoc DL(St);
16408  SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16409  SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16410  for (unsigned i = 0; i < NumElems; ++i)
16411  ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16412  : i * SizeRatio;
16413 
16414  // Can't shuffle using an illegal type.
16415  if (!TLI.isTypeLegal(WideVecVT))
16416  return SDValue();
16417 
16418  SDValue Shuff = DAG.getVectorShuffle(
16419  WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16420  // At this point all of the data is stored at the bottom of the
16421  // register. We now need to save it to mem.
16422 
16423  // Find the largest store unit
16424  MVT StoreType = MVT::i8;
16425  for (MVT Tp : MVT::integer_valuetypes()) {
16426  if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16427  StoreType = Tp;
16428  }
16429  // Didn't find a legal store type.
16430  if (!TLI.isTypeLegal(StoreType))
16431  return SDValue();
16432 
16433  // Bitcast the original vector into a vector of store-size units
16434  EVT StoreVecVT =
16435  EVT::getVectorVT(*DAG.getContext(), StoreType,
16436  VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16437  assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16438  SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16439  SmallVector<SDValue, 8> Chains;
16440  SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16441  TLI.getPointerTy(DAG.getDataLayout()));
16442  SDValue BasePtr = St->getBasePtr();
16443 
16444  // Perform one or more big stores into memory.
16445  unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16446  for (unsigned I = 0; I < E; I++) {
16447  SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16448  ShuffWide, DAG.getIntPtrConstant(I, DL));
16449  SDValue Ch =
16450  DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16451  St->getAlign(), St->getMemOperand()->getFlags());
16452  BasePtr =
16453  DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16454  Chains.push_back(Ch);
16455  }
16456  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16457 }
16458 
16459 // Try taking a single vector store from an fpround (which would otherwise turn
16460 // into an expensive buildvector) and splitting it into a series of narrowing
16461 // stores.
16463  SelectionDAG &DAG) {
16464  if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16465  return SDValue();
16466  SDValue Trunc = St->getValue();
16467  if (Trunc->getOpcode() != ISD::FP_ROUND)
16468  return SDValue();
16469  EVT FromVT = Trunc->getOperand(0).getValueType();
16470  EVT ToVT = Trunc.getValueType();
16471  if (!ToVT.isVector())
16472  return SDValue();
16473  assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements());
16474  EVT ToEltVT = ToVT.getVectorElementType();
16475  EVT FromEltVT = FromVT.getVectorElementType();
16476 
16477  if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16478  return SDValue();
16479 
16480  unsigned NumElements = 4;
16481  if (FromVT.getVectorNumElements() % NumElements != 0)
16482  return SDValue();
16483 
16484  // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16485  // use the VMOVN over splitting the store. We are looking for patterns of:
16486  // !rev: 0 N 1 N+1 2 N+2 ...
16487  // rev: N 0 N+1 1 N+2 2 ...
16488  // The shuffle may either be a single source (in which case N = NumElts/2) or
16489  // two inputs extended with concat to the same size (in which case N =
16490  // NumElts).
16491  auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16492  ArrayRef<int> M = SVN->getMask();
16493  unsigned NumElts = ToVT.getVectorNumElements();
16494  if (SVN->getOperand(1).isUndef())
16495  NumElts /= 2;
16496 
16497  unsigned Off0 = Rev ? NumElts : 0;
16498  unsigned Off1 = Rev ? 0 : NumElts;
16499 
16500  for (unsigned I = 0; I < NumElts; I += 2) {
16501  if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16502  return false;
16503  if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16504  return false;
16505  }
16506 
16507  return true;
16508  };
16509 
16510  if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16511  if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16512  return SDValue();
16513 
16514  LLVMContext &C = *DAG.getContext();
16515  SDLoc DL(St);
16516  // Details about the old store
16517  SDValue Ch = St->getChain();
16518  SDValue BasePtr = St->getBasePtr();
16519  Align Alignment = St->getOriginalAlign();
16520  MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16521  AAMDNodes AAInfo = St->getAAInfo();
16522 
16523  // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16524  // and then stored as truncating integer stores.
16525  EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16526  EVT NewToVT = EVT::getVectorVT(
16527  C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16528 
16529  SmallVector<SDValue, 4> Stores;
16530  for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16531  unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16532  SDValue NewPtr =
16533  DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
16534 
16535  SDValue Extract =
16536  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16537  DAG.getConstant(i * NumElements, DL, MVT::i32));
16538 
16539  SDValue FPTrunc =
16541  Extract, DAG.getConstant(0, DL, MVT::i32));
16542  Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16543 
16544  SDValue Store = DAG.getTruncStore(
16545  Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16546  NewToVT, Alignment, MMOFlags, AAInfo);
16547  Stores.push_back(Store);
16548  }
16549  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16550 }
16551 
16552 // Try taking a single vector store from an MVETRUNC (which would otherwise turn
16553 // into an expensive buildvector) and splitting it into a series of narrowing
16554 // stores.
16556  SelectionDAG &DAG) {
16557  if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16558  return SDValue();
16559  SDValue Trunc = St->getValue();
16560  if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16561  return SDValue();
16562  EVT FromVT = Trunc->getOperand(0).getValueType();
16563  EVT ToVT = Trunc.getValueType();
16564 
16565  LLVMContext &C = *DAG.getContext();
16566  SDLoc DL(St);
16567  // Details about the old store
16568  SDValue Ch = St->getChain();
16569  SDValue BasePtr = St->getBasePtr();
16570  Align Alignment = St->getOriginalAlign();
16571  MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16572  AAMDNodes AAInfo = St->getAAInfo();
16573 
16574  EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16575  FromVT.getVectorNumElements());
16576 
16577  SmallVector<SDValue, 4> Stores;
16578  for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16579  unsigned NewOffset =
16580  i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16581  SDValue NewPtr =
16582  DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
16583 
16584  SDValue Extract = Trunc.getOperand(i);
16585  SDValue Store = DAG.getTruncStore(
16586  Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16587  NewToVT, Alignment, MMOFlags, AAInfo);
16588  Stores.push_back(Store);
16589  }
16590  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16591 }
16592 
16593 // Given a floating point store from an extracted vector, with an integer
16594 // VGETLANE that already exists, store the existing VGETLANEu directly. This can
16595 // help reduce fp register pressure, doesn't require the fp extract and allows
16596 // use of more integer post-inc stores not available with vstr.
16598  if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16599  return SDValue();
16600  SDValue Extract = St->getValue();
16601  EVT VT = Extract.getValueType();
16602  // For now only uses f16. This may be useful for f32 too, but that will
16603  // be bitcast(extract), not the VGETLANEu we currently check here.
16604  if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16605  return SDValue();
16606 
16607  SDNode *GetLane =
16609  {Extract.getOperand(0), Extract.getOperand(1)});
16610  if (!GetLane)
16611  return SDValue();
16612 
16613  LLVMContext &C = *DAG.getContext();
16614  SDLoc DL(St);
16615  // Create a new integer store to replace the existing floating point version.
16616  SDValue Ch = St->getChain();
16617  SDValue BasePtr = St->getBasePtr();
16618  Align Alignment = St->getOriginalAlign();
16619  MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16620  AAMDNodes AAInfo = St->getAAInfo();
16621  EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16622  SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16623  St->getPointerInfo(), NewToVT, Alignment,
16624  MMOFlags, AAInfo);
16625 
16626  return Store;
16627 }
16628 
16629 /// PerformSTORECombine - Target-specific dag combine xforms for
16630 /// ISD::STORE.
16633  const ARMSubtarget *Subtarget) {
16634  StoreSDNode *St = cast<StoreSDNode>(N);
16635  if (St->isVolatile())
16636  return SDValue();
16637  SDValue StVal = St->getValue();
16638  EVT VT = StVal.getValueType();
16639 
16640  if (Subtarget->hasNEON())
16642  return Store;
16643 
16644  if (Subtarget->hasMVEIntegerOps()) {
16645  if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16646  return NewToken;
16647  if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16648  return NewChain;
16649  if (SDValue NewToken =
16651  return NewToken;
16652  }
16653 
16654  if (!ISD::isNormalStore(St))
16655  return SDValue();
16656 
16657  // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16658  // ARM stores of arguments in the same cache line.
16659  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16660  StVal.getNode()->hasOneUse()) {
16661  SelectionDAG &DAG = DCI.DAG;
16662  bool isBigEndian = DAG.getDataLayout().isBigEndian();
16663  SDLoc DL(St);
16664  SDValue BasePtr = St->getBasePtr();
16665  SDValue NewST1 = DAG.getStore(
16666  St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16667  BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16668  St->getMemOperand()->getFlags());
16669 
16670  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16671  DAG.getConstant(4, DL, MVT::i32));
16672  return DAG.getStore(NewST1.getValue(0), DL,
16673  StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16674  OffsetPtr, St->getPointerInfo().getWithOffset(4),
16675  St->getOriginalAlign(),
16676  St->getMemOperand()->getFlags());
16677  }
16678 
16679  if (StVal.getValueType() == MVT::i64 &&
16680  StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16681 
16682  // Bitcast an i64 store extracted from a vector to f64.
16683  // Otherwise, the i64 value will be legalized to a pair of i32 values.
16684  SelectionDAG &DAG = DCI.DAG;
16685  SDLoc dl(StVal);
16686  SDValue IntVec = StVal.getOperand(0);
16687  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16688  IntVec.getValueType().getVectorNumElements());
16689  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16690  SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16691  Vec, StVal.getOperand(1));
16692  dl = SDLoc(N);
16693  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16694  // Make the DAGCombiner fold the bitcasts.
16695  DCI.AddToWorklist(Vec.getNode());
16696  DCI.AddToWorklist(ExtElt.getNode());
16697  DCI.AddToWorklist(V.getNode());
16698  return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16699  St->getPointerInfo(), St->getAlign(),
16700  St->getMemOperand()->getFlags(), St->getAAInfo());
16701  }
16702 
16703  // If this is a legal vector store, try to combine it into a VST1_UPD.
16704  if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16706  return CombineBaseUpdate(N, DCI);
16707 
16708  return SDValue();
16709 }
16710 
16711 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16712 /// can replace combinations of VMUL and VCVT (floating-point to integer)
16713 /// when the VMUL has a constant operand that is a power of 2.
16714 ///
16715 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16716 /// vmul.f32 d16, d17, d16
16717 /// vcvt.s32.f32 d16, d16
16718 /// becomes:
16719 /// vcvt.s32.f32 d16, d16, #3
16721  const ARMSubtarget *Subtarget) {
16722  if (!Subtarget->hasNEON())
16723  return SDValue();
16724 
16725  SDValue Op = N->getOperand(0);
16726  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16727  Op.getOpcode() != ISD::FMUL)
16728  return SDValue();
16729 
16730  SDValue ConstVec = Op->getOperand(1);
16731  if (!isa<BuildVectorSDNode>(ConstVec))
16732  return SDValue();
16733 
16734  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16735  uint32_t FloatBits = FloatTy.getSizeInBits();
16736  MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16737  uint32_t IntBits = IntTy.getSizeInBits();
16738  unsigned NumLanes = Op.getValueType().getVectorNumElements();
16739  if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16740  // These instructions only exist converting from f32 to i32. We can handle
16741  // smaller integers by generating an extra truncate, but larger ones would
16742  // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16743  // these intructions only support v2i32/v4i32 types.
16744  return SDValue();
16745  }
16746 
16747  BitVector UndefElements;
16748  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16749  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16750  if (C == -1 || C == 0 || C > 32)
16751  return SDValue();
16752 
16753  SDLoc dl(N);
16754  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16755  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16756  Intrinsic::arm_neon_vcvtfp2fxu;
16757  SDValue FixConv = DAG.getNode(
16758  ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16759  DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16760  DAG.getConstant(C, dl, MVT::i32));
16761 
16762  if (IntBits < FloatBits)
16763  FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16764 
16765  return FixConv;
16766 }
16767 
16769  const ARMSubtarget *Subtarget) {
16770  if (!Subtarget->hasMVEFloatOps())
16771  return SDValue();
16772 
16773  // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16774  // The second form can be more easily turned into a predicated vadd, and
16775  // possibly combined into a fma to become a predicated vfma.
16776  SDValue Op0 = N->getOperand(0);
16777  SDValue Op1 = N->getOperand(1);
16778  EVT VT = N->getValueType(0);
16779  SDLoc DL(N);
16780 
16781  // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16782  // which these VMOV's represent.
16783  auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
16784  if (Op.getOpcode() != ISD::BITCAST ||
16785  Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16786  return false;
16787  uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
16788  if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16789  return true;
16790  if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16791  return true;
16792  return false;
16793  };
16794 
16795  if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
16796  std::swap(Op0, Op1);
16797 
16798  if (Op1.getOpcode() != ISD::VSELECT)
16799  return SDValue();
16800 
16801  SDNodeFlags FaddFlags = N->getFlags();
16802  bool NSZ = FaddFlags.hasNoSignedZeros();
16803  if (!isIdentitySplat(Op1.getOperand(2), NSZ))
16804  return SDValue();
16805 
16806  SDValue FAdd =
16807  DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
16808  return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
16809 }
16810 
16811 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
16812 /// can replace combinations of VCVT (integer to floating-point) and VDIV
16813 /// when the VDIV has a constant operand that is a power of 2.
16814 ///
16815 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16816 /// vcvt.f32.s32 d16, d16
16817 /// vdiv.f32 d16, d17, d16
16818 /// becomes:
16819 /// vcvt.f32.s32 d16, d16, #3
16821  const ARMSubtarget *Subtarget) {
16822  if (!Subtarget->hasNEON())
16823  return SDValue();
16824 
16825  SDValue Op = N->getOperand(0);
16826  unsigned OpOpcode = Op.getNode()->getOpcode();
16827  if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
16828  (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
16829  return SDValue();
16830 
16831  SDValue ConstVec = N->getOperand(1);
16832  if (!isa<BuildVectorSDNode>(ConstVec))
16833  return SDValue();
16834 
16835  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
16836  uint32_t FloatBits = FloatTy.getSizeInBits();
16837  MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
16838  uint32_t IntBits = IntTy.getSizeInBits();
16839  unsigned NumLanes = Op.getValueType().getVectorNumElements();
16840  if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16841  // These instructions only exist converting from i32 to f32. We can handle
16842  // smaller integers by generating an extra extend, but larger ones would
16843  // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16844  // these intructions only support v2i32/v4i32 types.
16845  return SDValue();
16846  }
16847 
16848  BitVector UndefElements;
16849  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16850  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16851  if (C == -1 || C == 0 || C > 32)
16852  return SDValue();
16853 
16854  SDLoc dl(N);
16855  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
16856  SDValue ConvInput = Op.getOperand(0);
16857  if (IntBits < FloatBits)
16858  ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
16859  dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16860  ConvInput);
16861 
16862  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
16863  Intrinsic::arm_neon_vcvtfxu2fp;
16864  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
16865  Op.getValueType(),
16866  DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
16867  ConvInput, DAG.getConstant(C, dl, MVT::i32));
16868 }
16869 
16871  const ARMSubtarget *ST) {
16872  if (!ST->hasMVEIntegerOps())
16873  return SDValue();
16874 
16875  assert(N->getOpcode() == ISD::VECREDUCE_ADD);
16876  EVT ResVT = N->getValueType(0);
16877  SDValue N0 = N->getOperand(0);
16878  SDLoc dl(N);
16879 
16880  // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
16881  if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
16882  (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
16883  N0.getValueType() == MVT::v16i8)) {
16884  SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
16885  SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
16886  return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
16887  }
16888 
16889  // We are looking for something that will have illegal types if left alone,
16890  // but that we can convert to a single instruction under MVE. For example
16891  // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
16892  // or
16893  // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
16894 
16895  // The legal cases are:
16896  // VADDV u/s 8/16/32
16897  // VMLAV u/s 8/16/32
16898  // VADDLV u/s 32
16899  // VMLALV u/s 16/32
16900 
16901  // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
16902  // extend it and use v4i32 instead.
16903  auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
16904  EVT AVT = A.getValueType();
16905  return any_of(ExtTypes, [&](MVT Ty) {
16906  return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
16907  AVT.bitsLE(Ty);
16908  });
16909  };
16910  auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
16911  EVT AVT = A.getValueType();
16912  if (!AVT.is128BitVector())
16913  A = DAG.getNode(ExtendCode, dl,
16915  128 / AVT.getVectorMinNumElements())),
16916  A);
16917  return A;
16918  };
16919  auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
16920  if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
16921  return SDValue();
16922  SDValue A = N0->getOperand(0);
16923  if (ExtTypeMatches(A, ExtTypes))
16924  return ExtendIfNeeded(A, ExtendCode);
16925  return SDValue();
16926  };
16927  auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
16928  ArrayRef<MVT> ExtTypes, SDValue &Mask) {
16929  if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
16931  return SDValue();
16932  Mask = N0->getOperand(0);
16933  SDValue Ext = N0->getOperand(1);
16934  if (Ext->getOpcode() != ExtendCode)
16935  return SDValue();
16936  SDValue A = Ext->getOperand(0);
16937  if (ExtTypeMatches(A, ExtTypes))
16938  return ExtendIfNeeded(A, ExtendCode);
16939  return SDValue();
16940  };
16941  auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
16942  SDValue &A, SDValue &B) {
16943  // For a vmla we are trying to match a larger pattern:
16944  // ExtA = sext/zext A
16945  // ExtB = sext/zext B
16946  // Mul = mul ExtA, ExtB
16947  // vecreduce.add Mul
16948  // There might also be en extra extend between the mul and the addreduce, so
16949  // long as the bitwidth is high enough to make them equivalent (for example
16950  // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
16951  if (ResVT != RetTy)
16952  return false;
16953  SDValue Mul = N0;
16954  if (Mul->getOpcode() == ExtendCode &&
16955  Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
16956  ResVT.getScalarSizeInBits())
16957  Mul = Mul->getOperand(0);
16958  if (Mul->getOpcode() != ISD::MUL)
16959  return false;
16960  SDValue ExtA = Mul->getOperand(0);
16961  SDValue ExtB = Mul->getOperand(1);
16962  if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
16963  return false;
16964  A = ExtA->getOperand(0);
16965  B = ExtB->getOperand(0);
16966  if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
16967  A = ExtendIfNeeded(A, ExtendCode);
16968  B = ExtendIfNeeded(B, ExtendCode);
16969  return true;
16970  }
16971  return false;
16972  };
16973  auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
16974  SDValue &A, SDValue &B, SDValue &Mask) {
16975  // Same as the pattern above with a select for the zero predicated lanes
16976  // ExtA = sext/zext A
16977  // ExtB = sext/zext B
16978  // Mul = mul ExtA, ExtB
16979  // N0 = select Mask, Mul, 0
16980  // vecreduce.add N0
16981  if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
16983  return false;
16984  Mask = N0->getOperand(0);
16985  SDValue Mul = N0->getOperand(1);
16986  if (Mul->getOpcode() == ExtendCode &&
16987  Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
16988  ResVT.getScalarSizeInBits())
16989  Mul = Mul->getOperand(0);
16990  if (Mul->getOpcode() != ISD::MUL)
16991  return false;
16992  SDValue ExtA = Mul->getOperand(0);
16993  SDValue ExtB = Mul->getOperand(1);
16994  if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
16995  return false;
16996  A = ExtA->getOperand(0);
16997  B = ExtB->getOperand(0);
16998  if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
16999  A = ExtendIfNeeded(A, ExtendCode);
17000  B = ExtendIfNeeded(B, ExtendCode);
17001  return true;
17002  }
17003  return false;
17004  };
17005  auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17006  // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17007  // reductions. The operands are extended with MVEEXT, but as they are
17008  // reductions the lane orders do not matter. MVEEXT may be combined with
17009  // loads to produce two extending loads, or else they will be expanded to
17010  // VREV/VMOVL.
17011  EVT VT = Ops[0].getValueType();
17012  if (VT == MVT::v16i8) {
17013  assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17014  "Unexpected illegal long reduction opcode");
17015  bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17016 
17017  SDValue Ext0 =
17018  DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17019  DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17020  SDValue Ext1 =
17021  DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17022  DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17023 
17024  SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17025  Ext0, Ext1);
17026  SDValue MLA1 =
17027  DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17028  DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17029  Ext0.getValue(1), Ext1.getValue(1));
17030  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17031  }
17032  SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17033  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17034  SDValue(Node.getNode(), 1));
17035  };
17036 
17037  SDValue A, B;
17038  SDValue Mask;
17039  if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17040  return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17041  if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17042  return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17044  A, B))
17045  return Create64bitNode(ARMISD::VMLALVs, {A, B});
17047  A, B))
17048  return Create64bitNode(ARMISD::VMLALVu, {A, B});
17049  if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17050  return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17051  DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17052  if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17053  return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17054  DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17055 
17056  if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17057  Mask))
17058  return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17059  if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17060  Mask))
17061  return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17062  if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17063  Mask))
17064  return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17065  if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17066  Mask))
17067  return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17068  if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17069  return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17070  DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17071  if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17072  return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17073  DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17074 
17075  if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17076  return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17077  if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17078  return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17079  if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17080  return Create64bitNode(ARMISD::VADDLVs, {A});
17081  if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17082  return Create64bitNode(ARMISD::VADDLVu, {A});
17083  if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17084  return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17085  DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17086  if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17087  return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17088  DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17089 
17090  if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17091  return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17092  if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17093  return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17094  if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17095  return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17096  if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17097  return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17098  if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17099  return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17100  DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17101  if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17102  return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17103  DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17104 
17105  // Some complications. We can get a case where the two inputs of the mul are
17106  // the same, then the output sext will have been helpfully converted to a
17107  // zext. Turn it back.
17108  SDValue Op = N0;
17109  if (Op->getOpcode() == ISD::VSELECT)
17110  Op = Op->getOperand(1);
17111  if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17112  Op->getOperand(0)->getOpcode() == ISD::MUL) {
17113  SDValue Mul = Op->getOperand(0);
17114  if (Mul->getOperand(0) == Mul->getOperand(1) &&
17115  Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17116  SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17117  if (Op != N0)
17118  Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17119  N0->getOperand(0), Ext, N0->getOperand(2));
17120  return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17121  }
17122  }
17123 
17124  return SDValue();
17125 }
17126 
17127 // Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17128 // the lanes are used. Due to the reduction being commutative the shuffle can be
17129 // removed.
17131  unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17132  auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17133  if (!Shuf || !Shuf->getOperand(1).isUndef())
17134  return SDValue();
17135 
17136  // Check all elements are used once in the mask.
17137  ArrayRef<int> Mask = Shuf->getMask();
17138  APInt SetElts(Mask.size(), 0);
17139  for (int E : Mask) {
17140  if (E < 0 || E >= (int)Mask.size())
17141  return SDValue();
17142  SetElts.setBit(E);
17143  }
17144  if (!SetElts.isAllOnes())
17145  return SDValue();
17146 
17147  if (N->getNumOperands() != VecOp + 1) {
17148  auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17149  if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17150  return SDValue();
17151  }
17152 
17154  for (SDValue Op : N->ops()) {
17155  if (Op.getValueType().isVector())
17156  Ops.push_back(Op.getOperand(0));
17157  else
17158  Ops.push_back(Op);
17159  }
17160  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17161 }
17162 
17165  SDValue Op0 = N->getOperand(0);
17166  SDValue Op1 = N->getOperand(1);
17167  unsigned IsTop = N->getConstantOperandVal(2);
17168 
17169  // VMOVNT a undef -> a
17170  // VMOVNB a undef -> a
17171  // VMOVNB undef a -> a
17172  if (Op1->isUndef())
17173  return Op0;
17174  if (Op0->isUndef() && !IsTop)
17175  return Op1;
17176 
17177  // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17178  // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17179  if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17180  Op1->getOpcode() == ARMISD::VQMOVNu) &&
17181  Op1->getConstantOperandVal(2) == 0)
17182  return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17183  Op0, Op1->getOperand(1), N->getOperand(2));
17184 
17185  // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17186  // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17187  // into the top or bottom lanes.
17188  unsigned NumElts = N->getValueType(0).getVectorNumElements();
17189  APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17190  APInt Op0DemandedElts =
17191  IsTop ? Op1DemandedElts
17192  : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17193 
17194  const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17195  if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17196  return SDValue(N, 0);
17197  if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17198  return SDValue(N, 0);
17199 
17200  return SDValue();
17201 }
17202 
17205  SDValue Op0 = N->getOperand(0);
17206  unsigned IsTop = N->getConstantOperandVal(2);
17207 
17208  unsigned NumElts = N->getValueType(0).getVectorNumElements();
17209  APInt Op0DemandedElts =
17210  APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17211  : APInt::getHighBitsSet(2, 1));
17212 
17213  const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17214  if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17215  return SDValue(N, 0);
17216  return SDValue();
17217 }
17218 
17221  EVT VT = N->getValueType(0);
17222  SDValue LHS = N->getOperand(0);
17223  SDValue RHS = N->getOperand(1);
17224 
17225  auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17226  auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17227  // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17228  if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17229  LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17230  (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17231  SDLoc DL(N);
17232  SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17233  LHS.getOperand(0), RHS.getOperand(0));
17234  SDValue UndefV = LHS.getOperand(1);
17235  return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17236  }
17237  return SDValue();
17238 }
17239 
17241  SDLoc DL(N);
17242  SDValue Op0 = N->getOperand(0);
17243  SDValue Op1 = N->getOperand(1);
17244 
17245  // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17246  // uses of the intrinsics.
17247  if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17248  int ShiftAmt = C->getSExtValue();
17249  if (ShiftAmt == 0) {
17250  SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17251  DAG.ReplaceAllUsesWith(N, Merge.getNode());
17252  return SDValue();
17253  }
17254 
17255  if (ShiftAmt >= -32 && ShiftAmt < 0) {
17256  unsigned NewOpcode =
17257  N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17258  SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17259  DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17260  DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17261  return NewShift;
17262  }
17263  }
17264 
17265  return SDValue();
17266 }
17267 
17268 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17270  DAGCombinerInfo &DCI) const {
17271  SelectionDAG &DAG = DCI.DAG;
17272  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
17273  switch (IntNo) {
17274  default:
17275  // Don't do anything for most intrinsics.
17276  break;
17277 
17278  // Vector shifts: check for immediate versions and lower them.
17279  // Note: This is done during DAG combining instead of DAG legalizing because
17280  // the build_vectors for 64-bit vector element shift counts are generally
17281  // not legal, and it is hard to see their values after they get legalized to
17282  // loads from a constant pool.
17283  case Intrinsic::arm_neon_vshifts:
17284  case Intrinsic::arm_neon_vshiftu:
17285  case Intrinsic::arm_neon_vrshifts:
17286  case Intrinsic::arm_neon_vrshiftu:
17287  case Intrinsic::arm_neon_vrshiftn:
17288  case Intrinsic::arm_neon_vqshifts:
17289  case Intrinsic::arm_neon_vqshiftu:
17290  case Intrinsic::arm_neon_vqshiftsu:
17291  case Intrinsic::arm_neon_vqshiftns:
17292  case Intrinsic::arm_neon_vqshiftnu:
17293  case Intrinsic::arm_neon_vqshiftnsu:
17294  case Intrinsic::arm_neon_vqrshiftns:
17295  case Intrinsic::arm_neon_vqrshiftnu:
17296  case Intrinsic::arm_neon_vqrshiftnsu: {
17297  EVT VT = N->getOperand(1).getValueType();
17298  int64_t Cnt;
17299  unsigned VShiftOpc = 0;
17300 
17301  switch (IntNo) {
17302  case Intrinsic::arm_neon_vshifts:
17303  case Intrinsic::arm_neon_vshiftu:
17304  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17305  VShiftOpc = ARMISD::VSHLIMM;
17306  break;
17307  }
17308  if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17309  VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17310  : ARMISD::VSHRuIMM);
17311  break;
17312  }
17313  return SDValue();
17314 
17315  case Intrinsic::arm_neon_vrshifts:
17316  case Intrinsic::arm_neon_vrshiftu:
17317  if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17318  break;
17319  return SDValue();
17320 
17321  case Intrinsic::arm_neon_vqshifts:
17322  case Intrinsic::arm_neon_vqshiftu:
17323  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17324  break;
17325  return SDValue();
17326 
17327  case Intrinsic::arm_neon_vqshiftsu:
17328  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17329  break;
17330  llvm_unreachable("invalid shift count for vqshlu intrinsic");
17331 
17332  case Intrinsic::arm_neon_vrshiftn:
17333  case Intrinsic::arm_neon_vqshiftns:
17334  case Intrinsic::arm_neon_vqshiftnu:
17335  case Intrinsic::arm_neon_vqshiftnsu:
17336  case Intrinsic::arm_neon_vqrshiftns:
17337  case Intrinsic::arm_neon_vqrshiftnu:
17338  case Intrinsic::arm_neon_vqrshiftnsu:
17339  // Narrowing shifts require an immediate right shift.
17340  if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17341  break;
17342  llvm_unreachable("invalid shift count for narrowing vector shift "
17343  "intrinsic");
17344 
17345  default:
17346  llvm_unreachable("unhandled vector shift");
17347  }
17348 
17349  switch (IntNo) {
17350  case Intrinsic::arm_neon_vshifts:
17351  case Intrinsic::arm_neon_vshiftu:
17352  // Opcode already set above.
17353  break;
17354  case Intrinsic::arm_neon_vrshifts:
17355  VShiftOpc = ARMISD::VRSHRsIMM;
17356  break;
17357  case Intrinsic::arm_neon_vrshiftu:
17358  VShiftOpc = ARMISD::VRSHRuIMM;
17359  break;
17360  case Intrinsic::arm_neon_vrshiftn:
17361  VShiftOpc = ARMISD::VRSHRNIMM;
17362  break;
17363  case Intrinsic::arm_neon_vqshifts:
17364  VShiftOpc = ARMISD::VQSHLsIMM;
17365  break;
17366  case Intrinsic::arm_neon_vqshiftu:
17367  VShiftOpc = ARMISD::VQSHLuIMM;
17368  break;
17369  case Intrinsic::arm_neon_vqshiftsu:
17370  VShiftOpc = ARMISD::VQSHLsuIMM;
17371  break;
17372  case Intrinsic::arm_neon_vqshiftns:
17373  VShiftOpc = ARMISD::VQSHRNsIMM;
17374  break;
17375  case Intrinsic::arm_neon_vqshiftnu:
17376  VShiftOpc = ARMISD::VQSHRNuIMM;
17377  break;
17378  case Intrinsic::arm_neon_vqshiftnsu:
17379  VShiftOpc = ARMISD::VQSHRNsuIMM;
17380  break;
17381  case Intrinsic::arm_neon_vqrshiftns:
17382  VShiftOpc = ARMISD::VQRSHRNsIMM;
17383  break;
17384  case Intrinsic::arm_neon_vqrshiftnu:
17385  VShiftOpc = ARMISD::VQRSHRNuIMM;
17386  break;
17387  case Intrinsic::arm_neon_vqrshiftnsu:
17388  VShiftOpc = ARMISD::VQRSHRNsuIMM;
17389  break;
17390  }
17391 
17392  SDLoc dl(N);
17393  return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17394  N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17395  }
17396 
17397  case Intrinsic::arm_neon_vshiftins: {
17398  EVT VT = N->getOperand(1).getValueType();
17399  int64_t Cnt;
17400  unsigned VShiftOpc = 0;
17401 
17402  if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17403  VShiftOpc = ARMISD::VSLIIMM;
17404  else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17405  VShiftOpc = ARMISD::VSRIIMM;
17406  else {
17407  llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17408  }
17409 
17410  SDLoc dl(N);
17411  return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17412  N->getOperand(1), N->getOperand(2),
17413  DAG.getConstant(Cnt, dl, MVT::i32));
17414  }
17415 
17416  case Intrinsic::arm_neon_vqrshifts:
17417  case Intrinsic::arm_neon_vqrshiftu:
17418  // No immediate versions of these to check for.
17419  break;
17420 
17421  case Intrinsic::arm_mve_vqdmlah:
17422  case Intrinsic::arm_mve_vqdmlash:
17423  case Intrinsic::arm_mve_vqrdmlah:
17424  case Intrinsic::arm_mve_vqrdmlash:
17425  case Intrinsic::arm_mve_vmla_n_predicated:
17426  case Intrinsic::arm_mve_vmlas_n_predicated:
17427  case Intrinsic::arm_mve_vqdmlah_predicated:
17428  case Intrinsic::arm_mve_vqdmlash_predicated:
17429  case Intrinsic::arm_mve_vqrdmlah_predicated:
17430  case Intrinsic::arm_mve_vqrdmlash_predicated: {
17431  // These intrinsics all take an i32 scalar operand which is narrowed to the
17432  // size of a single lane of the vector type they return. So we don't need
17433  // any bits of that operand above that point, which allows us to eliminate
17434  // uxth/sxth.
17435  unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17436  APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17437  if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17438  return SDValue();
17439  break;
17440  }
17441 
17442  case Intrinsic::arm_mve_minv:
17443  case Intrinsic::arm_mve_maxv:
17444  case Intrinsic::arm_mve_minav:
17445  case Intrinsic::arm_mve_maxav:
17446  case Intrinsic::arm_mve_minv_predicated:
17447  case Intrinsic::arm_mve_maxv_predicated:
17448  case Intrinsic::arm_mve_minav_predicated:
17449  case Intrinsic::arm_mve_maxav_predicated: {
17450  // These intrinsics all take an i32 scalar operand which is narrowed to the
17451  // size of a single lane of the vector type they take as the other input.
17452  unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17453  APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17454  if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17455  return SDValue();
17456  break;
17457  }
17458 
17459  case Intrinsic::arm_mve_addv: {
17460  // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17461  // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17462  bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
17463  unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17464  return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17465  }
17466 
17467  case Intrinsic::arm_mve_addlv:
17468  case Intrinsic::arm_mve_addlv_predicated: {
17469  // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17470  // which recombines the two outputs into an i64
17471  bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
17472  unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17475 
17477  for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17478  if (i != 2) // skip the unsigned flag
17479  Ops.push_back(N->getOperand(i));
17480 
17481  SDLoc dl(N);
17482  SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17483  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17484  val.getValue(1));
17485  }
17486  }
17487 
17488  return SDValue();
17489 }
17490 
17491 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
17492 /// lowers them. As with the vector shift intrinsics, this is done during DAG
17493 /// combining instead of DAG legalizing because the build_vectors for 64-bit
17494 /// vector element shift counts are generally not legal, and it is hard to see
17495 /// their values after they get legalized to loads from a constant pool.
17498  const ARMSubtarget *ST) {
17499  SelectionDAG &DAG = DCI.DAG;
17500  EVT VT = N->getValueType(0);
17501 
17502  if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17503  N->getOperand(0)->getOpcode() == ISD::AND &&
17504  N->getOperand(0)->hasOneUse()) {
17505  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17506  return SDValue();
17507  // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17508  // usually show up because instcombine prefers to canonicalize it to
17509  // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17510  // out of GEP lowering in some cases.
17511  SDValue N0 = N->getOperand(0);
17512  ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17513  if (!ShiftAmtNode)
17514  return SDValue();
17515  uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17516  ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17517  if (!AndMaskNode)
17518  return SDValue();
17519  uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17520  // Don't transform uxtb/uxth.
17521  if (AndMask == 255 || AndMask == 65535)
17522  return SDValue();
17523  if (isMask_32(AndMask)) {
17524  uint32_t MaskedBits = llvm::countl_zero(AndMask);
17525  if (MaskedBits > ShiftAmt) {
17526  SDLoc DL(N);
17527  SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17528  DAG.getConstant(MaskedBits, DL, MVT::i32));
17529  return DAG.getNode(
17530  ISD::SRL, DL, MVT::i32, SHL,
17531  DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17532  }
17533  }
17534  }
17535 
17536  // Nothing to be done for scalar shifts.
17537  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17538  if (!VT.isVector() || !TLI.isTypeLegal(VT))
17539  return SDValue();
17540  if (ST->hasMVEIntegerOps())
17541  return SDValue();
17542 
17543  int64_t Cnt;
17544 
17545  switch (N->getOpcode()) {
17546  default: llvm_unreachable("unexpected shift opcode");
17547 
17548  case ISD::SHL:
17549  if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17550  SDLoc dl(N);
17551  return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17552  DAG.getConstant(Cnt, dl, MVT::i32));
17553  }
17554  break;
17555 
17556  case ISD::SRA:
17557  case ISD::SRL:
17558  if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17559  unsigned VShiftOpc =
17560  (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17561  SDLoc dl(N);
17562  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17563  DAG.getConstant(Cnt, dl, MVT::i32));
17564  }
17565  }
17566  return SDValue();
17567 }
17568 
17569 // Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17570 // split into multiple extending loads, which are simpler to deal with than an
17571 // arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17572 // to convert the type to an f32.
17574  SDValue N0 = N->getOperand(0);
17575  if (N0.getOpcode() != ISD::LOAD)
17576  return SDValue();
17577  LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17578  if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17579  LD->getExtensionType() != ISD::NON_EXTLOAD)
17580  return SDValue();
17581  EVT FromVT = LD->getValueType(0);
17582  EVT ToVT = N->getValueType(0);
17583  if (!ToVT.isVector())
17584  return SDValue();
17585  assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements());
17586  EVT ToEltVT = ToVT.getVectorElementType();
17587  EVT FromEltVT = FromVT.getVectorElementType();
17588 
17589  unsigned NumElements = 0;
17590  if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17591  NumElements = 4;
17592  if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17593  NumElements = 4;
17594  if (NumElements == 0 ||
17595  (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17596  FromVT.getVectorNumElements() % NumElements != 0 ||
17597  !isPowerOf2_32(NumElements))
17598  return SDValue();
17599 
17600  LLVMContext &C = *DAG.getContext();
17601  SDLoc DL(LD);
17602  // Details about the old load
17603  SDValue Ch = LD->getChain();
17604  SDValue BasePtr = LD->getBasePtr();
17605  Align Alignment = LD->getOriginalAlign();
17606  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17607  AAMDNodes AAInfo = LD->getAAInfo();
17608 
17609  ISD::LoadExtType NewExtType =
17610  N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17611  SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17612  EVT NewFromVT = EVT::getVectorVT(
17613  C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17614  EVT NewToVT = EVT::getVectorVT(
17615  C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17616 
17618  SmallVector<SDValue, 4> Chains;
17619  for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17620  unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17621  SDValue NewPtr =
17622  DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
17623 
17624  SDValue NewLoad =
17625  DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17626  LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17627  Alignment, MMOFlags, AAInfo);
17628  Loads.push_back(NewLoad);
17629  Chains.push_back(SDValue(NewLoad.getNode(), 1));
17630  }
17631 
17632  // Float truncs need to extended with VCVTB's into their floating point types.
17633  if (FromEltVT == MVT::f16) {
17634  SmallVector<SDValue, 4> Extends;
17635 
17636  for (unsigned i = 0; i < Loads.size(); i++) {
17637  SDValue LoadBC =
17639  SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17640  DAG.getConstant(0, DL, MVT::i32));
17641  Extends.push_back(FPExt);
17642  }
17643 
17644  Loads = Extends;
17645  }
17646 
17647  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17648  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17649  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17650 }
17651 
17652 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17653 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17655  const ARMSubtarget *ST) {
17656  SDValue N0 = N->getOperand(0);
17657 
17658  // Check for sign- and zero-extensions of vector extract operations of 8- and
17659  // 16-bit vector elements. NEON and MVE support these directly. They are
17660  // handled during DAG combining because type legalization will promote them
17661  // to 32-bit types and it is messy to recognize the operations after that.
17662  if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17664  SDValue Vec = N0.getOperand(0);
17665  SDValue Lane = N0.getOperand(1);
17666  EVT VT = N->getValueType(0);
17667  EVT EltVT = N0.getValueType();
17668  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17669 
17670  if (VT == MVT::i32 &&
17671  (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17672  TLI.isTypeLegal(Vec.getValueType()) &&
17673  isa<ConstantSDNode>(Lane)) {
17674 
17675  unsigned Opc = 0;
17676  switch (N->getOpcode()) {
17677  default: llvm_unreachable("unexpected opcode");
17678  case ISD::SIGN_EXTEND:
17679  Opc = ARMISD::VGETLANEs;
17680  break;
17681  case ISD::ZERO_EXTEND:
17682  case ISD::ANY_EXTEND:
17683  Opc = ARMISD::VGETLANEu;
17684  break;
17685  }
17686  return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17687  }
17688  }
17689 
17690  if (ST->hasMVEIntegerOps())
17691  if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17692  return NewLoad;
17693 
17694  return SDValue();
17695 }
17696 
17698  const ARMSubtarget *ST) {
17699  if (ST->hasMVEFloatOps())
17700  if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17701  return NewLoad;
17702 
17703  return SDValue();
17704 }
17705 
17706 // Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17707 // constant bounds.
17709  const ARMSubtarget *Subtarget) {
17710  if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17711  !Subtarget->isThumb2())
17712  return SDValue();
17713 
17714  EVT VT = Op.getValueType();
17715  SDValue Op0 = Op.getOperand(0);
17716 
17717  if (VT != MVT::i32 ||
17718  (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17719  !isa<ConstantSDNode>(Op.getOperand(1)) ||
17720  !isa<ConstantSDNode>(Op0.getOperand(1)))
17721  return SDValue();
17722 
17723  SDValue Min = Op;
17724  SDValue Max = Op0;
17725  SDValue Input = Op0.getOperand(0);
17726  if (Min.getOpcode() == ISD::SMAX)
17727  std::swap(Min, Max);
17728 
17729  APInt MinC = Min.getConstantOperandAPInt(1);
17730  APInt MaxC = Max.getConstantOperandAPInt(1);
17731 
17732  if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
17733  !(MinC + 1).isPowerOf2())
17734  return SDValue();
17735 
17736  SDLoc DL(Op);
17737  if (MinC == ~MaxC)
17738  return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
17739  DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
17740  if (MaxC == 0)
17741  return DAG.getNode(ARMISD::USAT, DL, VT, Input,
17742  DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
17743 
17744  return SDValue();
17745 }
17746 
17747 /// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17748 /// saturates.
17750  const ARMSubtarget *ST) {
17751  EVT VT = N->getValueType(0);
17752  SDValue N0 = N->getOperand(0);
17753 
17754  if (VT == MVT::i32)
17755  return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
17756 
17757  if (!ST->hasMVEIntegerOps())
17758  return SDValue();
17759 
17760  if (SDValue V = PerformVQDMULHCombine(N, DAG))
17761  return V;
17762 
17763  if (VT != MVT::v4i32 && VT != MVT::v8i16)
17764  return SDValue();
17765 
17766  auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
17767  // Check one is a smin and the other is a smax
17768  if (Min->getOpcode() != ISD::SMIN)
17769  std::swap(Min, Max);
17770  if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
17771  return false;
17772 
17773  APInt SaturateC;
17774  if (VT == MVT::v4i32)
17775  SaturateC = APInt(32, (1 << 15) - 1, true);
17776  else //if (VT == MVT::v8i16)
17777  SaturateC = APInt(16, (1 << 7) - 1, true);
17778 
17779  APInt MinC, MaxC;
17780  if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
17781  MinC != SaturateC)
17782  return false;
17783  if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
17784  MaxC != ~SaturateC)
17785  return false;
17786  return true;
17787  };
17788 
17789  if (IsSignedSaturate(N, N0.getNode())) {
17790  SDLoc DL(N);
17791  MVT ExtVT, HalfVT;
17792  if (VT == MVT::v4i32) {
17793  HalfVT = MVT::v8i16;
17794  ExtVT = MVT::v4i16;
17795  } else { // if (VT == MVT::v8i16)
17796  HalfVT = MVT::v16i8;
17797  ExtVT = MVT::v8i8;
17798  }
17799 
17800  // Create a VQMOVNB with undef top lanes, then signed extended into the top
17801  // half. That extend will hopefully be removed if only the bottom bits are
17802  // demanded (though a truncating store, for example).
17803  SDValue VQMOVN =
17804  DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
17805  N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
17806  SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
17807  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
17808  DAG.getValueType(ExtVT));
17809  }
17810 
17811  auto IsUnsignedSaturate = [&](SDNode *Min) {
17812  // For unsigned, we just need to check for <= 0xffff
17813  if (Min->getOpcode() != ISD::UMIN)
17814  return false;
17815 
17816  APInt SaturateC;
17817  if (VT == MVT::v4i32)
17818  SaturateC = APInt(32, (1 << 16) - 1, true);
17819  else //if (VT == MVT::v8i16)
17820  SaturateC = APInt(16, (1 << 8) - 1, true);
17821 
17822  APInt MinC;
17823  if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
17824  MinC != SaturateC)
17825  return false;
17826  return true;
17827  };
17828 
17829  if (IsUnsignedSaturate(N)) {
17830  SDLoc DL(N);
17831  MVT HalfVT;
17832  unsigned ExtConst;
17833  if (VT == MVT::v4i32) {
17834  HalfVT = MVT::v8i16;
17835  ExtConst = 0x0000FFFF;
17836  } else { //if (VT == MVT::v8i16)
17837  HalfVT = MVT::v16i8;
17838  ExtConst = 0x00FF;
17839  }
17840 
17841  // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
17842  // an AND. That extend will hopefully be removed if only the bottom bits are
17843  // demanded (though a truncating store, for example).
17844  SDValue VQMOVN =
17845  DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
17846  DAG.getConstant(0, DL, MVT::i32));
17847  SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
17848  return DAG.getNode(ISD::AND, DL, VT, Bitcast,
17849  DAG.getConstant(ExtConst, DL, VT));
17850  }
17851 
17852  return SDValue();
17853 }
17854 
17856  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
17857  if (!C)
17858  return nullptr;
17859  const APInt *CV = &C->getAPIntValue();
17860  return CV->isPowerOf2() ? CV : nullptr;
17861 }
17862 
17864  // If we have a CMOV, OR and AND combination such as:
17865  // if (x & CN)
17866  // y |= CM;
17867  //
17868  // And:
17869  // * CN is a single bit;
17870  // * All bits covered by CM are known zero in y
17871  //
17872  // Then we can convert this into a sequence of BFI instructions. This will
17873  // always be a win if CM is a single bit, will always be no worse than the
17874  // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
17875  // three bits (due to the extra IT instruction).
17876 
17877  SDValue Op0 = CMOV->getOperand(0);
17878  SDValue Op1 = CMOV->getOperand(1);
17879  auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
17880  auto CC = CCNode->getAPIntValue().getLimitedValue();
17881  SDValue CmpZ = CMOV->getOperand(4);
17882 
17883  // The compare must be against zero.
17884  if (!isNullConstant(CmpZ->getOperand(1)))
17885  return SDValue();
17886 
17887  assert(CmpZ->getOpcode() == ARMISD::CMPZ);
17888  SDValue And = CmpZ->getOperand(0);
17889  if (And->getOpcode() != ISD::AND)
17890  return SDValue();
17891  const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
17892  if (!AndC)
17893  return SDValue();
17894  SDValue X = And->getOperand(0);
17895 
17896  if (CC == ARMCC::EQ) {
17897  // We're performing an "equal to zero" compare. Swap the operands so we
17898  // canonicalize on a "not equal to zero" compare.
17899  std::swap(Op0, Op1);
17900  } else {
17901  assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
17902  }
17903 
17904  if (Op1->getOpcode() != ISD::OR)
17905  return SDValue();
17906 
17907  ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
17908  if (!OrC)
17909  return SDValue();
17910  SDValue Y = Op1->getOperand(0);
17911 
17912  if (Op0 != Y)
17913  return SDValue();
17914 
17915  // Now, is it profitable to continue?
17916  APInt OrCI = OrC->getAPIntValue();
17917  unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
17918  if (OrCI.countPopulation() > Heuristic)
17919  return SDValue();
17920 
17921  // Lastly, can we determine that the bits defined by OrCI
17922  // are zero in Y?
17923  KnownBits Known = DAG.computeKnownBits(Y);
17924  if ((OrCI & Known.Zero) != OrCI)
17925  return SDValue();
17926 
17927  // OK, we can do the combine.
17928  SDValue V = Y;
17929  SDLoc dl(X);
17930  EVT VT = X.getValueType();
17931  unsigned BitInX = AndC->logBase2();
17932 
17933  if (BitInX != 0) {
17934  // We must shift X first.
17935  X = DAG.getNode(ISD::SRL, dl, VT, X,
17936  DAG.getConstant(BitInX, dl, VT));
17937  }
17938 
17939  for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
17940  BitInY < NumActiveBits; ++BitInY) {
17941  if (OrCI[BitInY] == 0)
17942  continue;
17943  APInt Mask(VT.getSizeInBits(), 0);
17944  Mask.setBit(BitInY);
17945  V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
17946  // Confusingly, the operand is an *inverted* mask.
17947  DAG.getConstant(~Mask, dl, VT));
17948  }
17949 
17950  return V;
17951 }
17952 
17953 // Given N, the value controlling the conditional branch, search for the loop
17954 // intrinsic, returning it, along with how the value is used. We need to handle
17955 // patterns such as the following:
17956 // (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
17957 // (brcond (setcc (loop.decrement), 0, eq), exit)
17958 // (brcond (setcc (loop.decrement), 0, ne), header)
17960  bool &Negate) {
17961  switch (N->getOpcode()) {
17962  default:
17963  break;
17964  case ISD::XOR: {
17965  if (!isa<ConstantSDNode>(N.getOperand(1)))
17966  return SDValue();
17967  if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
17968  return SDValue();
17969  Negate = !Negate;
17970  return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
17971  }
17972  case ISD::SETCC: {
17973  auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
17974  if (!Const)
17975  return SDValue();
17976  if (Const->isZero())
17977  Imm = 0;
17978  else if (Const->isOne())
17979  Imm = 1;
17980  else
17981  return SDValue();
17982  CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
17983  return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
17984  }
17985  case ISD::INTRINSIC_W_CHAIN: {
17986  unsigned IntOp = cast<ConstantSDNode>(N.getOperand(1))->getZExtValue();
17987  if (IntOp != Intrinsic::test_start_loop_iterations &&
17988  IntOp != Intrinsic::loop_decrement_reg)
17989  return SDValue();
17990  return N;
17991  }
17992  }
17993  return SDValue();
17994 }
17995 
17998  const ARMSubtarget *ST) {
17999 
18000  // The hwloop intrinsics that we're interested are used for control-flow,
18001  // either for entering or exiting the loop:
18002  // - test.start.loop.iterations will test whether its operand is zero. If it
18003  // is zero, the proceeding branch should not enter the loop.
18004  // - loop.decrement.reg also tests whether its operand is zero. If it is
18005  // zero, the proceeding branch should not branch back to the beginning of
18006  // the loop.
18007  // So here, we need to check that how the brcond is using the result of each
18008  // of the intrinsics to ensure that we're branching to the right place at the
18009  // right time.
18010 
18011  ISD::CondCode CC;
18012  SDValue Cond;
18013  int Imm = 1;
18014  bool Negate = false;
18015  SDValue Chain = N->getOperand(0);
18016  SDValue Dest;
18017 
18018  if (N->getOpcode() == ISD::BRCOND) {
18019  CC = ISD::SETEQ;
18020  Cond = N->getOperand(1);
18021  Dest = N->getOperand(2);
18022  } else {
18023  assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18024  CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18025  Cond = N->getOperand(2);
18026  Dest = N->getOperand(4);
18027  if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18028  if (!Const->isOne() && !Const->isZero())
18029  return SDValue();
18030  Imm = Const->getZExtValue();
18031  } else
18032  return SDValue();
18033  }
18034 
18035  SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18036  if (!Int)
18037  return SDValue();
18038 
18039  if (Negate)
18040  CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18041 
18042  auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18043  return (CC == ISD::SETEQ && Imm == 0) ||
18044  (CC == ISD::SETNE && Imm == 1) ||
18045  (CC == ISD::SETLT && Imm == 1) ||
18046  (CC == ISD::SETULT && Imm == 1);
18047  };
18048 
18049  auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18050  return (CC == ISD::SETEQ && Imm == 1) ||
18051  (CC == ISD::SETNE && Imm == 0) ||
18052  (CC == ISD::SETGT && Imm == 0) ||
18053  (CC == ISD::SETUGT && Imm == 0) ||
18054  (CC == ISD::SETGE && Imm == 1) ||
18055  (CC == ISD::SETUGE && Imm == 1);
18056  };
18057 
18058  assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18059  "unsupported condition");
18060 
18061  SDLoc dl(Int);
18062  SelectionDAG &DAG = DCI.DAG;
18063  SDValue Elements = Int.getOperand(2);
18064  unsigned IntOp = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
18065  assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
18066  && "expected single br user");
18067  SDNode *Br = *N->use_begin();
18068  SDValue OtherTarget = Br->getOperand(1);
18069 
18070  // Update the unconditional branch to branch to the given Dest.
18071  auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18072  SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18073  SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18074  DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18075  };
18076 
18077  if (IntOp == Intrinsic::test_start_loop_iterations) {
18078  SDValue Res;
18079  SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18080  // We expect this 'instruction' to branch when the counter is zero.
18081  if (IsTrueIfZero(CC, Imm)) {
18082  SDValue Ops[] = {Chain, Setup, Dest};
18083  Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18084  } else {
18085  // The logic is the reverse of what we need for WLS, so find the other
18086  // basic block target: the target of the proceeding br.
18087  UpdateUncondBr(Br, Dest, DAG);
18088 
18089  SDValue Ops[] = {Chain, Setup, OtherTarget};
18090  Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18091  }
18092  // Update LR count to the new value
18093  DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18094  // Update chain
18095  DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18096  return Res;
18097  } else {
18098  SDValue Size = DAG.getTargetConstant(
18099  cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, MVT::i32);
18100  SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18101  SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18103  DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18104 
18105  // We expect this instruction to branch when the count is not zero.
18106  SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18107 
18108  // Update the unconditional branch to target the loop preheader if we've
18109  // found the condition has been reversed.
18110  if (Target == OtherTarget)
18111  UpdateUncondBr(Br, Dest, DAG);
18112 
18113  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18114  SDValue(LoopDec.getNode(), 1), Chain);
18115 
18116  SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18117  return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18118  }
18119  return SDValue();
18120 }
18121 
18122 /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18123 SDValue
18125  SDValue Cmp = N->getOperand(4);
18126  if (Cmp.getOpcode() != ARMISD::CMPZ)
18127  // Only looking at NE cases.
18128  return SDValue();
18129 
18130  EVT VT = N->getValueType(0);
18131  SDLoc dl(N);
18132  SDValue LHS = Cmp.getOperand(0);
18133  SDValue RHS = Cmp.getOperand(1);
18134  SDValue Chain = N->getOperand(0);
18135  SDValue BB = N->getOperand(1);
18136  SDValue ARMcc = N->getOperand(2);
18138  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
18139 
18140  // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
18141  // -> (brcond Chain BB CC CPSR Cmp)
18142  if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18143  LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18144  LHS->getOperand(0)->hasOneUse()) {
18145  auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
18146  auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
18147  auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
18148  auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
18149  if ((LHS00C && LHS00C->getZExtValue() == 0) &&
18150  (LHS01C && LHS01C->getZExtValue() == 1) &&
18151  (LHS1C && LHS1C->getZExtValue() == 1) &&
18152  (RHSC && RHSC->getZExtValue() == 0)) {
18153  return DAG.getNode(
18154  ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
18155  LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
18156  }
18157  }
18158 
18159  return SDValue();
18160 }
18161 
18162 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18163 SDValue
18165  SDValue Cmp = N->getOperand(4);
18166  if (Cmp.getOpcode() != ARMISD::CMPZ)
18167  // Only looking at EQ and NE cases.
18168  return SDValue();
18169 
18170  EVT VT = N->getValueType(0);
18171  SDLoc dl(N);
18172  SDValue LHS = Cmp.getOperand(0);
18173  SDValue RHS = Cmp.getOperand(1);
18174  SDValue FalseVal = N->getOperand(0);
18175  SDValue TrueVal = N->getOperand(1);
18176  SDValue ARMcc = N->getOperand(2);
18178  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
18179 
18180  // BFI is only available on V6T2+.
18181  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18182  SDValue R = PerformCMOVToBFICombine(N, DAG);
18183  if (R)
18184  return R;
18185  }
18186 
18187  // Simplify
18188  // mov r1, r0
18189  // cmp r1, x
18190  // mov r0, y
18191  // moveq r0, x
18192  // to
18193  // cmp r0, x
18194  // movne r0, y
18195  //
18196  // mov r1, r0
18197  // cmp r1, x
18198  // mov r0, x
18199  // movne r0, y
18200  // to
18201  // cmp r0, x
18202  // movne r0, y
18203  /// FIXME: Turn this into a target neutral optimization?
18204  SDValue Res;
18205  if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18206  Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
18207  N->getOperand(3), Cmp);
18208  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18209  SDValue ARMcc;
18210  SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18211  Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
18212  N->getOperand(3), NewCmp);
18213  }
18214 
18215  // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
18216  // -> (cmov F T CC CPSR Cmp)
18217  if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
18218  auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
18219  auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
18220  auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
18221  if ((LHS0C && LHS0C->getZExtValue() == 0) &&
18222  (LHS1C && LHS1C->getZExtValue() == 1) &&
18223  (RHSC && RHSC->getZExtValue() == 0)) {
18224  return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18225  LHS->getOperand(2), LHS->getOperand(3),
18226  LHS->getOperand(4));
18227  }
18228  }
18229 
18230  if (!VT.isInteger())
18231  return SDValue();
18232 
18233  // Fold away an unneccessary CMPZ/CMOV
18234  // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18235  // if C1==EQ -> CMOV A, B, C2, $cpsr, D
18236  // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18237  if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18238  N->getConstantOperandVal(2) == ARMCC::NE) {
18240  if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18241  if (N->getConstantOperandVal(2) == ARMCC::NE)
18243  return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18244  N->getOperand(1),
18246  N->getOperand(3), C);
18247  }
18248  }
18249 
18250  // Materialize a boolean comparison for integers so we can avoid branching.
18251  if (isNullConstant(FalseVal)) {
18252  if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18253  if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18254  // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18255  // right 5 bits will make that 32 be 1, otherwise it will be 0.
18256  // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18257  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18258  Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18259  DAG.getConstant(5, dl, MVT::i32));
18260  } else {
18261  // CMOV 0, 1, ==, (CMPZ x, y) ->
18262  // (ADDCARRY (SUB x, y), t:0, t:1)
18263  // where t = (SUBCARRY 0, (SUB x, y), 0)
18264  //
18265  // The SUBCARRY computes 0 - (x - y) and this will give a borrow when
18266  // x != y. In other words, a carry C == 1 when x == y, C == 0
18267  // otherwise.
18268  // The final ADDCARRY computes
18269  // x - y + (0 - (x - y)) + C == C
18270  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18271  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18272  SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18273  // ISD::SUBCARRY returns a borrow but we want the carry here
18274  // actually.
18275  SDValue Carry =
18276  DAG.getNode(ISD::SUB, dl, MVT::i32,
18277  DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18278  Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
18279  }
18280  } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18281  (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18282  // This seems pointless but will allow us to combine it further below.
18283  // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18284  SDValue Sub =
18285  DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18286  SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18287  Sub.getValue(1), SDValue());
18288  Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18289  N->getOperand(3), CPSRGlue.getValue(1));
18290  FalseVal = Sub;
18291  }
18292  } else if (isNullConstant(TrueVal)) {
18293  if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18294  (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18295  // This seems pointless but will allow us to combine it further below
18296  // Note that we change == for != as this is the dual for the case above.
18297  // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18298  SDValue Sub =
18299  DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18300  SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18301  Sub.getValue(1), SDValue());
18302  Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18303  DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18304  N->getOperand(3), CPSRGlue.getValue(1));
18305  FalseVal = Sub;
18306  }
18307  }
18308 
18309  // On Thumb1, the DAG above may be further combined if z is a power of 2
18310  // (z == 2 ^ K).
18311  // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
18312  // t1 = (USUBO (SUB x, y), 1)
18313  // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
18314  // Result = if K != 0 then (SHL t2:0, K) else t2:0
18315  //
18316  // This also handles the special case of comparing against zero; it's
18317  // essentially, the same pattern, except there's no SUBS:
18318  // CMOV x, z, !=, (CMPZ x, 0) ->
18319  // t1 = (USUBO x, 1)
18320  // t2 = (SUBCARRY x, t1:0, t1:1)
18321  // Result = if K != 0 then (SHL t2:0, K) else t2:0
18322  const APInt *TrueConst;
18323  if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18324  ((FalseVal.getOpcode() == ARMISD::SUBS &&
18325  FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) ||
18326  (FalseVal == LHS && isNullConstant(RHS))) &&
18327  (TrueConst = isPowerOf2Constant(TrueVal))) {
18328  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18329  unsigned ShiftAmount = TrueConst->logBase2();
18330  if (ShiftAmount)
18331  TrueVal = DAG.getConstant(1, dl, VT);
18332  SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18333  Res = DAG.getNode(ISD::SUBCARRY, dl, VTs, FalseVal, Subc, Subc.getValue(1));
18334 
18335  if (ShiftAmount)
18336  Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18337  DAG.getConstant(ShiftAmount, dl, MVT::i32));
18338  }
18339 
18340  if (Res.getNode()) {
18341  KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18342  // Capture demanded bits information that would be otherwise lost.
18343  if (Known.Zero == 0xfffffffe)
18344  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18345  DAG.getValueType(MVT::i1));
18346  else if (Known.Zero == 0xffffff00)
18347  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18348  DAG.getValueType(MVT::i8));
18349  else if (Known.Zero == 0xffff0000)
18350  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18351  DAG.getValueType(MVT::i16));
18352  }
18353 
18354  return Res;
18355 }
18356 
18359  const ARMSubtarget *ST) {
18360  SelectionDAG &DAG = DCI.DAG;
18361  SDValue Src = N->getOperand(0);
18362  EVT DstVT = N->getValueType(0);
18363 
18364  // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18365  if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18366  EVT SrcVT = Src.getValueType();
18367  if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18368  return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18369  }
18370 
18371  // We may have a bitcast of something that has already had this bitcast
18372  // combine performed on it, so skip past any VECTOR_REG_CASTs.
18373  while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
18374  Src = Src.getOperand(0);
18375 
18376  // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18377  // would be generated is at least the width of the element type.
18378  EVT SrcVT = Src.getValueType();
18379  if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18380  Src.getOpcode() == ARMISD::VMVNIMM ||
18381  Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18382  SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18383  DAG.getDataLayout().isBigEndian())
18384  return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18385 
18386  // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18387  if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18388  return R;
18389 
18390  return SDValue();
18391 }
18392 
18393 // Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18394 // node into stack operations after legalizeOps.
18396  SDNode *N, TargetLowering::DAGCombinerInfo &DCI) const {
18397  SelectionDAG &DAG = DCI.DAG;
18398  EVT VT = N->getValueType(0);
18399  SDLoc DL(N);
18400 
18401  // MVETrunc(Undef, Undef) -> Undef
18402  if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18403  return DAG.getUNDEF(VT);
18404 
18405  // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18406  if (N->getNumOperands() == 2 &&
18407  N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18408  N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18409  return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18410  N->getOperand(0).getOperand(1),
18411  N->getOperand(1).getOperand(0),
18412  N->getOperand(1).getOperand(1));
18413 
18414  // MVETrunc(shuffle, shuffle) -> VMOVN
18415  if (N->getNumOperands() == 2 &&
18416  N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18417  N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18418  auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18419  auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18420 
18421  if (S0->getOperand(0) == S1->getOperand(0) &&
18422  S0->getOperand(1) == S1->getOperand(1)) {
18423  // Construct complete shuffle mask
18424  SmallVector<int, 8> Mask(S0->getMask());
18425  Mask.append(S1->getMask().begin(), S1->getMask().end());
18426 
18427  if (isVMOVNTruncMask(Mask, VT, false))
18428  return DAG.getNode(
18429  ARMISD::VMOVN, DL, VT,
18430  DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18431  DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18432  DAG.getConstant(1, DL, MVT::i32));
18433  if (isVMOVNTruncMask(Mask, VT, true))
18434  return DAG.getNode(
18435  ARMISD::VMOVN, DL, VT,
18436  DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18437  DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18438  DAG.getConstant(1, DL, MVT::i32));
18439  }
18440  }
18441 
18442  // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18443  // truncate to a buildvector to allow the generic optimisations to kick in.
18444  if (all_of(N->ops(), [](SDValue Op) {
18445  return Op.getOpcode() == ISD::BUILD_VECTOR ||
18446  Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18447  (Op.getOpcode() == ISD::BITCAST &&
18448  Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18449  })) {
18450  SmallVector<SDValue, 8> Extracts;
18451  for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18452  SDValue O = N->getOperand(Op);
18453  for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18455  DAG.getConstant(i, DL, MVT::i32));
18456  Extracts.push_back(Ext);
18457  }
18458  }
18459  return DAG.getBuildVector(VT, DL, Extracts);
18460  }
18461 
18462  // If we are late in the legalization process and nothing has optimised
18463  // the trunc to anything better, lower it to a stack store and reload,
18464  // performing the truncation whilst keeping the lanes in the correct order:
18465  // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18466  if (!DCI.isAfterLegalizeDAG())
18467  return SDValue();
18468 
18469  SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::Fixed(16), Align(4));
18470  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18471  int NumIns = N->getNumOperands();
18472  assert((NumIns == 2 || NumIns == 4) &&
18473  "Expected 2 or 4 inputs to an MVETrunc");
18474  EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18475  if (N->getNumOperands() == 4)
18476  StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18477 
18478  SmallVector<SDValue> Chains;
18479  for (int I = 0; I < NumIns; I++) {
18480  SDValue Ptr = DAG.getNode(
18481  ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18482  DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18484  DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18485  SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18486  Ptr, MPI, StoreVT, Align(4));
18487  Chains.push_back(Ch);
18488  }
18489 
18490  SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18491  MachinePointerInfo MPI =
18493  return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18494 }
18495 
18496 // Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18498  SelectionDAG &DAG) {
18499  SDValue N0 = N->getOperand(0);
18500  LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18501  if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18502  return SDValue();
18503 
18504  EVT FromVT = LD->getMemoryVT();
18505  EVT ToVT = N->getValueType(0);
18506  if (!ToVT.isVector())
18507  return SDValue();
18508  assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18509  EVT ToEltVT = ToVT.getVectorElementType();
18510  EVT FromEltVT = FromVT.getVectorElementType();
18511 
18512  unsigned NumElements = 0;
18513  if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18514  NumElements = 4;
18515  if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18516  NumElements = 8;
18517  assert(NumElements != 0);
18518 
18519  ISD::LoadExtType NewExtType =
18520  N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18521  if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18522  LD->getExtensionType() != ISD::EXTLOAD &&
18523  LD->getExtensionType() != NewExtType)
18524  return SDValue();
18525 
18526  LLVMContext &C = *DAG.getContext();
18527  SDLoc DL(LD);
18528  // Details about the old load
18529  SDValue Ch = LD->getChain();
18530  SDValue BasePtr = LD->getBasePtr();
18531  Align Alignment = LD->getOriginalAlign();
18532  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18533  AAMDNodes AAInfo = LD->getAAInfo();
18534 
18535  SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18536  EVT NewFromVT = EVT::getVectorVT(
18537  C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18538  EVT NewToVT = EVT::getVectorVT(
18539  C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18540 
18542  SmallVector<SDValue, 4> Chains;
18543  for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18544  unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18545  SDValue NewPtr =
18546  DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
18547 
18548  SDValue NewLoad =
18549  DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18550  LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18551  Alignment, MMOFlags, AAInfo);
18552  Loads.push_back(NewLoad);
18553  Chains.push_back(SDValue(NewLoad.getNode(), 1));
18554  }
18555 
18556  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18557  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18558  return DAG.getMergeValues(Loads, DL);
18559 }
18560 
18561 // Perform combines for MVEEXT. If it has not be optimized to anything better
18562 // before lowering, it gets converted to stack store and extloads performing the
18563 // extend whilst still keeping the same lane ordering.
18565  SDNode *N, TargetLowering::DAGCombinerInfo &DCI) const {
18566  SelectionDAG &DAG = DCI.DAG;
18567  EVT VT = N->getValueType(0);
18568  SDLoc DL(N);
18569  assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18570  assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18571 
18572  EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18573  *DAG.getContext());
18574  auto Extend = [&](SDValue V) {
18575  SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18576  return N->getOpcode() == ARMISD::MVESEXT
18577  ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18578  DAG.getValueType(ExtVT))
18579  : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18580  };
18581 
18582  // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18583  if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18584  SDValue Ext = Extend(N->getOperand(0));
18585  return DAG.getMergeValues({Ext, Ext}, DL);
18586  }
18587 
18588  // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18589  if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18590  ArrayRef<int> Mask = SVN->getMask();
18591  assert(Mask.size() == 2 * VT.getVectorNumElements());
18592  assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18593  unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18594  SDValue Op0 = SVN->getOperand(0);
18595  SDValue Op1 = SVN->getOperand(1);
18596 
18597  auto CheckInregMask = [&](int Start, int Offset) {
18598  for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18599  if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18600  return false;
18601  return true;
18602  };
18603  SDValue V0 = SDValue(N, 0);
18604  SDValue V1 = SDValue(N, 1);
18605  if (CheckInregMask(0, 0))
18606  V0 = Extend(Op0);
18607  else if (CheckInregMask(0, 1))
18608  V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18609  else if (CheckInregMask(0, Mask.size()))
18610  V0 = Extend(Op1);
18611  else if (CheckInregMask(0, Mask.size() + 1))
18612  V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18613 
18614  if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18615  V1 = Extend(Op1);
18616  else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18617  V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18618  else if (CheckInregMask(VT.getVectorNumElements(), 0))
18619  V1 = Extend(Op0);
18620  else if (CheckInregMask(VT.getVectorNumElements(), 1))
18621  V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18622 
18623  if (V0.getNode() != N || V1.getNode() != N)
18624  return DAG.getMergeValues({V0, V1}, DL);
18625  }
18626 
18627  // MVEEXT(load) -> extload, extload
18628  if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18630  return L;
18631 
18632  if (!DCI.isAfterLegalizeDAG())
18633  return SDValue();
18634 
18635  // Lower to a stack store and reload:
18636  // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18637  SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::Fixed(16), Align(4));
18638  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18639  int NumOuts = N->getNumValues();
18640  assert((NumOuts == 2 || NumOuts == 4) &&
18641  "Expected 2 or 4 outputs to an MVEEXT");
18642  EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18643  *DAG.getContext());
18644  if (N->getNumOperands() == 4)
18645  LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18646 
18647  MachinePointerInfo MPI =
18649  SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18650  StackPtr, MPI, Align(4));
18651 
18652  SmallVector<SDValue> Loads;
18653  for (int I = 0; I < NumOuts; I++) {
18654  SDValue Ptr = DAG.getNode(
18655  ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18656  DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18658  DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18659  SDValue Load = DAG.getExtLoad(
18660  N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18661  VT, Chain, Ptr, MPI, LoadVT, Align(4));
18662  Loads.push_back(Load);
18663  }
18664 
18665  return DAG.getMergeValues(Loads, DL);
18666 }
18667 
18669  DAGCombinerInfo &DCI) const {
18670  switch (N->getOpcode()) {
18671  default: break;
18672  case ISD::SELECT_CC:
18673  case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18674  case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18675  case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18676  case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
18677  case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18678  case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18679  case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18680  case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18681  case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18682  case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18683  case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18684  case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18685  case ISD::BRCOND:
18686  case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18687  case ARMISD::ADDC:
18688  case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18689  case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18690  case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18691  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18692  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18693  case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18694  case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18695  case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18696  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18699  return PerformExtractEltCombine(N, DCI, Subtarget);
18703  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18704  case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18705  case ISD::FP_TO_SINT:
18706  case ISD::FP_TO_UINT:
18707  return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18708  case ISD::FADD:
18709  return PerformFAddVSelectCombine(N, DCI.DAG, Subtarget);
18710  case ISD::FDIV:
18711  return PerformVDIVCombine(N, DCI.DAG, Subtarget);
18713  return PerformIntrinsicCombine(N, DCI);
18714  case ISD::SHL:
18715  case ISD::SRA:
18716  case ISD::SRL:
18717  return PerformShiftCombine(N, DCI, Subtarget);
18718  case ISD::SIGN_EXTEND:
18719  case ISD::ZERO_EXTEND:
18720  case ISD::ANY_EXTEND:
18721  return PerformExtendCombine(N, DCI.DAG, Subtarget);
18722  case ISD::FP_EXTEND:
18723  return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18724  case ISD::SMIN:
18725  case ISD::UMIN:
18726  case ISD::SMAX:
18727  case ISD::UMAX:
18728  return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18729  case ARMISD::CMOV:
18730  return PerformCMOVCombine(N, DCI.DAG);
18731  case ARMISD::BRCOND:
18732  return PerformBRCONDCombine(N, DCI.DAG);
18733  case ARMISD::CMPZ:
18734  return PerformCMPZCombine(N, DCI.DAG);
18735  case ARMISD::CSINC:
18736  case ARMISD::CSINV:
18737  case ARMISD::CSNEG:
18738  return PerformCSETCombine(N, DCI.DAG);
18739  case ISD::LOAD:
18740  return PerformLOADCombine(N, DCI, Subtarget);
18741  case ARMISD::VLD1DUP:
18742  case ARMISD::VLD2DUP:
18743  case ARMISD::VLD3DUP:
18744  case ARMISD::VLD4DUP:
18745  return PerformVLDCombine(N, DCI);
18746  case ARMISD::BUILD_VECTOR:
18747  return PerformARMBUILD_VECTORCombine(N, DCI);
18748  case ISD::BITCAST:
18749  return PerformBITCASTCombine(N, DCI, Subtarget);
18751  return PerformPREDICATE_CASTCombine(N, DCI);
18753  return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
18754  case ARMISD::MVETRUNC:
18755  return PerformMVETruncCombine(N, DCI);
18756  case ARMISD::MVESEXT:
18757  case ARMISD::MVEZEXT:
18758  return PerformMVEExtCombine(N, DCI);
18759  case ARMISD::VCMP:
18760  return PerformVCMPCombine(N, DCI.DAG, Subtarget);
18761  case ISD::VECREDUCE_ADD:
18762  return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
18763  case ARMISD::VADDVs:
18764  case ARMISD::VADDVu:
18765  case ARMISD::VADDLVs:
18766  case ARMISD::VADDLVu:
18767  case ARMISD::VADDLVAs:
18768  case ARMISD::VADDLVAu:
18769  case ARMISD::VMLAVs:
18770  case ARMISD::VMLAVu:
18771  case ARMISD::VMLALVs:
18772  case ARMISD::VMLALVu:
18773  case ARMISD::VMLALVAs:
18774  case ARMISD::VMLALVAu:
18775  return PerformReduceShuffleCombine(N, DCI.DAG);
18776  case ARMISD::VMOVN:
18777  return PerformVMOVNCombine(N, DCI);
18778  case ARMISD::VQMOVNs:
18779  case ARMISD::VQMOVNu:
18780  return PerformVQMOVNCombine(N, DCI);
18781  case ARMISD::VQDMULH:
18782  return PerformVQDMULHCombine(N, DCI);
18783  case ARMISD::ASRL:
18784  case ARMISD::LSRL:
18785  case ARMISD::LSLL:
18786  return PerformLongShiftCombine(N, DCI.DAG);
18787  case ARMISD::SMULWB: {
18788  unsigned BitWidth = N->getValueType(0).getSizeInBits();
18789  APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18790  if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18791  return SDValue();
18792  break;
18793  }
18794  case ARMISD::SMULWT: {
18795  unsigned BitWidth = N->getValueType(0).getSizeInBits();
18796  APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
18797  if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18798  return SDValue();
18799  break;
18800  }
18801  case ARMISD::SMLALBB:
18802  case ARMISD::QADD16b:
18803  case ARMISD::QSUB16b:
18804  case ARMISD::UQADD16b:
18805  case ARMISD::UQSUB16b: {
18806  unsigned BitWidth = N->getValueType(0).getSizeInBits();
18807  APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18808  if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18809  (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18810  return SDValue();
18811  break;
18812  }
18813  case ARMISD::SMLALBT: {
18814  unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
18815  APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
18816  unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
18817  APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
18818  if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
18819  (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
18820  return SDValue();
18821  break;
18822  }
18823  case ARMISD::SMLALTB: {
18824  unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
18825  APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
18826  unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
18827  APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
18828  if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
18829  (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
18830  return SDValue();
18831  break;
18832  }
18833  case ARMISD::SMLALTT: {
18834  unsigned BitWidth = N->getValueType(0).getSizeInBits();
18835  APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
18836  if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18837  (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18838  return SDValue();
18839  break;
18840  }
18841  case ARMISD::QADD8b:
18842  case ARMISD::QSUB8b:
18843  case ARMISD::UQADD8b:
18844  case ARMISD::UQSUB8b: {
18845  unsigned BitWidth = N->getValueType(0).getSizeInBits();
18846  APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
18847  if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18848  (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18849  return SDValue();
18850  break;
18851  }
18852  case ISD::INTRINSIC_VOID:
18854  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
18855  case Intrinsic::arm_neon_vld1:
18856  case Intrinsic::arm_neon_vld1x2:
18857  case Intrinsic::arm_neon_vld1x3:
18858  case Intrinsic::arm_neon_vld1x4:
18859  case Intrinsic::arm_neon_vld2:
18860  case Intrinsic::arm_neon_vld3:
18861  case Intrinsic::arm_neon_vld4:
18862  case Intrinsic::arm_neon_vld2lane:
18863  case Intrinsic::arm_neon_vld3lane:
18864  case Intrinsic::arm_neon_vld4lane:
18865  case Intrinsic::arm_neon_vld2dup:
18866  case Intrinsic::arm_neon_vld3dup:
18867  case Intrinsic::arm_neon_vld4dup:
18868  case Intrinsic::arm_neon_vst1:
18869  case Intrinsic::arm_neon_vst1x2:
18870  case Intrinsic::arm_neon_vst1x3:
18871  case Intrinsic::arm_neon_vst1x4:
18872  case Intrinsic::arm_neon_vst2:
18873  case Intrinsic::arm_neon_vst3:
18874  case Intrinsic::arm_neon_vst4:
18875  case Intrinsic::arm_neon_vst2lane:
18876  case Intrinsic::arm_neon_vst3lane:
18877  case Intrinsic::arm_neon_vst4lane:
18878  return PerformVLDCombine(N, DCI);
18879  case Intrinsic::arm_mve_vld2q:
18880  case Intrinsic::arm_mve_vld4q:
18881  case Intrinsic::arm_mve_vst2q:
18882  case Intrinsic::arm_mve_vst4q:
18883  return PerformMVEVLDCombine(N, DCI);
18884  default: break;
18885  }
18886  break;
18887  }
18888  return SDValue();
18889 }
18890 
18892  EVT VT) const {
18893  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
18894 }
18895 
18897  Align Alignment,
18899  unsigned *Fast) const {
18900  // Depends what it gets converted into if the type is weird.
18901  if (!VT.isSimple())
18902  return false;
18903 
18904  // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
18905  bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
18906  auto Ty = VT.getSimpleVT().SimpleTy;
18907 
18908  if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
18909  // Unaligned access can use (for example) LRDB, LRDH, LDR
18910  if (AllowsUnaligned) {
18911  if (Fast)
18912  *Fast = Subtarget->hasV7Ops();
18913  return true;
18914  }
18915  }
18916 
18917  if (Ty == MVT::f64 || Ty == MVT::v2f64) {
18918  // For any little-endian targets with neon, we can support unaligned ld/st
18919  // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
18920  // A big-endian target may also explicitly support unaligned accesses
18921  if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
18922  if (Fast)
18923  *Fast = 1;
18924  return true;
18925  }
18926  }
18927 
18928  if (!Subtarget->hasMVEIntegerOps())
18929  return false;
18930 
18931  // These are for predicates
18932  if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
18933  Ty == MVT::v2i1)) {
18934  if (Fast)
18935  *Fast = 1;
18936  return true;
18937  }
18938 
18939  // These are for truncated stores/narrowing loads. They are fine so long as
18940  // the alignment is at least the size of the item being loaded
18941  if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
18942  Alignment >= VT.getScalarSizeInBits() / 8) {
18943  if (Fast)
18944  *Fast = true;
18945  return true;
18946  }
18947 
18948  // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
18949  // VSTRW.U32 all store the vector register in exactly the same format, and
18950  // differ only in the range of their immediate offset field and the required
18951  // alignment. So there is always a store that can be used, regardless of
18952  // actual type.
18953  //
18954  // For big endian, that is not the case. But can still emit a (VSTRB.U8;
18955  // VREV64.8) pair and get the same effect. This will likely be better than
18956  // aligning the vector through the stack.
18957  if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
18958  Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
18959  Ty == MVT::v2f64) {
18960  if (Fast)
18961  *Fast = 1;
18962  return true;
18963  }
18964 
18965  return false;
18966 }
18967 
18968 
18970  const MemOp &Op, const AttributeList &FuncAttributes) const {
18971  // See if we can use NEON instructions for this...
18972  if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
18973  !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
18974  unsigned Fast;
18975  if (Op.size() >= 16 &&
18976  (Op.isAligned(Align(16)) ||
18978  MachineMemOperand::MONone, &Fast) &&
18979  Fast))) {
18980  return MVT::v2f64;
18981  } else if (Op.size() >= 8 &&
18982  (Op.isAligned(Align(8)) ||
18984  MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
18985  Fast))) {
18986  return MVT::f64;
18987  }
18988  }
18989 
18990  // Let the target-independent logic figure it out.
18991  return MVT::Other;
18992 }
18993 
18994 // 64-bit integers are split into their high and low parts and held in two
18995 // different registers, so the trunc is free since the low register can just
18996 // be used.
18997 bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
18998  if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
18999  return false;
19000  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19001  unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19002  return (SrcBits == 64 && DestBits == 32);
19003 }
19004 
19005 bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
19006  if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19007  !DstVT.isInteger())
19008  return false;
19009  unsigned SrcBits = SrcVT.getSizeInBits();
19010  unsigned DestBits = DstVT.getSizeInBits();
19011  return (SrcBits == 64 && DestBits == 32);
19012 }
19013 
19015  if (Val.getOpcode() != ISD::LOAD)
19016  return false;
19017 
19018  EVT VT1 = Val.getValueType();
19019  if (!VT1.isSimple() || !VT1.isInteger() ||
19020  !VT2.isSimple() || !VT2.isInteger())
19021  return false;
19022 
19023  switch (VT1.getSimpleVT().SimpleTy) {
19024  default: break;
19025  case MVT::i1:
19026  case MVT::i8:
19027  case MVT::i16:
19028  // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19029  return true;
19030  }
19031 
19032  return false;
19033 }
19034 
19036  if (!VT.isSimple())
19037  return false;
19038 
19039  // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19040  // negate values directly (fneg is free). So, we don't want to let the DAG
19041  // combiner rewrite fneg into xors and some other instructions. For f16 and
19042  // FullFP16 argument passing, some bitcast nodes may be introduced,
19043  // triggering this DAG combine rewrite, so we are avoiding that with this.
19044  switch (VT.getSimpleVT().SimpleTy) {
19045  default: break;
19046  case MVT::f16:
19047  return Subtarget->hasFullFP16();
19048  }
19049 
19050  return false;
19051 }
19052 
19053 /// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
19054 /// of the vector elements.
19055 static bool areExtractExts(Value *Ext1, Value *Ext2) {
19056  auto areExtDoubled = [](Instruction *Ext) {
19057  return Ext->getType()->getScalarSizeInBits() ==
19058  2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
19059  };
19060 
19061  if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
19062  !match(Ext2, m_ZExtOrSExt(m_Value())) ||
19063  !areExtDoubled(cast<Instruction>(Ext1)) ||
19064  !areExtDoubled(cast<Instruction>(Ext2)))
19065  return false;
19066 
19067  return true;
19068 }
19069 
19070 /// Check if sinking \p I's operands to I's basic block is profitable, because
19071 /// the operands can be folded into a target instruction, e.g.
19072 /// sext/zext can be folded into vsubl.
19074  SmallVectorImpl<Use *> &Ops) const {
19075  if (!I->getType()->isVectorTy())
19076  return false;
19077 
19078  if (Subtarget->hasNEON()) {
19079  switch (I->getOpcode()) {
19080  case Instruction::Sub:
19081  case Instruction::Add: {
19082  if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
19083  return false;
19084  Ops.push_back(&I->getOperandUse(0));
19085  Ops.push_back(&I->getOperandUse(1));
19086  return true;
19087  }
19088  default:
19089  return false;
19090  }
19091  }
19092 
19093  if (!Subtarget->hasMVEIntegerOps())
19094  return false;
19095 
19096  auto IsFMSMul = [&](Instruction *I) {
19097  if (!I->hasOneUse())
19098  return false;
19099  auto *Sub = cast<Instruction>(*I->users().begin());
19100  return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
19101  };
19102  auto IsFMS = [&](Instruction *I) {
19103  if (match(I->getOperand(0), m_FNeg(m_Value())) ||
19104  match(I->getOperand(1), m_FNeg(m_Value())))
19105  return true;
19106  return false;
19107  };
19108 
19109  auto IsSinker = [&](Instruction *I, int Operand) {
19110  switch (I->getOpcode()) {
19111  case Instruction::Add:
19112  case Instruction::Mul:
19113  case Instruction::FAdd:
19114  case Instruction::ICmp:
19115  case Instruction::FCmp:
19116  return true;
19117  case Instruction::FMul:
19118  return !IsFMSMul(I);
19119  case Instruction::Sub:
19120  case Instruction::FSub:
19121  case Instruction::Shl:
19122  case Instruction::LShr:
19123  case Instruction::AShr:
19124  return Operand == 1;
19125  case Instruction::Call:
19126  if (auto *II = dyn_cast<IntrinsicInst>(I)) {
19127  switch (II->getIntrinsicID()) {
19128  case Intrinsic::fma:
19129  return !IsFMS(I);
19130  case Intrinsic::sadd_sat:
19131  case Intrinsic::uadd_sat:
19132  case Intrinsic::arm_mve_add_predicated:
19133  case Intrinsic::arm_mve_mul_predicated:
19134  case Intrinsic::arm_mve_qadd_predicated:
19135  case Intrinsic::arm_mve_vhadd:
19136  case Intrinsic::arm_mve_hadd_predicated:
19137  case Intrinsic::arm_mve_vqdmull:
19138  case Intrinsic::arm_mve_vqdmull_predicated:
19139  case Intrinsic::arm_mve_vqdmulh:
19140  case Intrinsic::arm_mve_qdmulh_predicated:
19141  case Intrinsic::arm_mve_vqrdmulh:
19142  case Intrinsic::arm_mve_qrdmulh_predicated:
19143  case Intrinsic::arm_mve_fma_predicated:
19144  return true;
19145  case Intrinsic::ssub_sat:
19146  case Intrinsic::usub_sat:
19147  case Intrinsic::arm_mve_sub_predicated:
19148  case Intrinsic::arm_mve_qsub_predicated:
19149  case Intrinsic::arm_mve_hsub_predicated:
19150  case Intrinsic::arm_mve_vhsub:
19151  return Operand == 1;
19152  default:
19153  return false;
19154  }
19155  }
19156  return false;
19157  default:
19158  return false;
19159  }
19160  };
19161 
19162  for (auto OpIdx : enumerate(I->operands())) {
19163  Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
19164  // Make sure we are not already sinking this operand
19165  if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
19166  continue;
19167 
19168  Instruction *Shuffle = Op;
19169  if (Shuffle->getOpcode() == Instruction::BitCast)
19170  Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
19171  // We are looking for a splat that can be sunk.
19172  if (!Shuffle ||
19175  m_Undef(), m_ZeroMask())))
19176  continue;
19177  if (!IsSinker(I, OpIdx.index()))
19178  continue;
19179 
19180  // All uses of the shuffle should be sunk to avoid duplicating it across gpr
19181  // and vector registers
19182  for (Use &U : Op->uses()) {
19183  Instruction *Insn = cast<Instruction>(U.getUser());
19184  if (!IsSinker(Insn, U.getOperandNo()))
19185  return false;
19186  }
19187 
19188  Ops.push_back(&Shuffle->getOperandUse(0));
19189  if (Shuffle != Op)
19190  Ops.push_back(&Op->getOperandUse(0));
19191  Ops.push_back(&OpIdx.value());
19192  }
19193  return true;
19194 }
19195 
19197  if (!Subtarget->hasMVEIntegerOps())
19198  return nullptr;
19199  Type *SVIType = SVI->getType();
19200  Type *ScalarType = SVIType->getScalarType();
19201 
19202  if (ScalarType->isFloatTy())
19203  return Type::getInt32Ty(SVIType->getContext());
19204  if (ScalarType->isHalfTy())
19205  return Type::getInt16Ty(SVIType->getContext());
19206  return nullptr;
19207 }
19208 
19210  EVT VT = ExtVal.getValueType();
19211 
19212  if (!isTypeLegal(VT))
19213  return false;
19214 
19215  if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19216  if (Ld->isExpandingLoad())
19217  return false;
19218  }
19219 
19220  if (Subtarget->hasMVEIntegerOps())
19221  return true;
19222 
19223  // Don't create a loadext if we can fold the extension into a wide/long
19224  // instruction.
19225  // If there's more than one user instruction, the loadext is desirable no
19226  // matter what. There can be two uses by the same instruction.
19227  if (ExtVal->use_empty() ||
19228  !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
19229  return true;
19230 
19231  SDNode *U = *ExtVal->use_begin();
19232  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19233  U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19234  return false;
19235 
19236  return true;
19237 }
19238 
19240  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19241  return false;
19242 
19243  if (!isTypeLegal(EVT::getEVT(Ty1)))
19244  return false;
19245 
19246  assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19247 
19248  // Assuming the caller doesn't have a zeroext or signext return parameter,
19249  // truncation all the way down to i1 is valid.
19250  return true;
19251 }
19252 
19253 /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19254 /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19255 /// expanded to FMAs when this method returns true, otherwise fmuladd is
19256 /// expanded to fmul + fadd.
19257 ///
19258 /// ARM supports both fused and unfused multiply-add operations; we already
19259 /// lower a pair of fmul and fadd to the latter so it's not clear that there
19260 /// would be a gain or that the gain would be worthwhile enough to risk
19261 /// correctness bugs.
19262 ///
19263 /// For MVE, we set this to true as it helps simplify the need for some
19264 /// patterns (and we don't have the non-fused floating point instruction).
19265 bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19266  EVT VT) const {
19267  if (!VT.isSimple())
19268  return false;
19269 
19270  switch (VT.getSimpleVT().SimpleTy) {
19271  case MVT::v4f32:
19272  case MVT::v8f16:
19273  return Subtarget->hasMVEFloatOps();
19274  case MVT::f16:
19275  return Subtarget->useFPVFMx16();
19276  case MVT::f32:
19277  return Subtarget->useFPVFMx();
19278  case MVT::f64:
19279  return Subtarget->useFPVFMx64();
19280  default:
19281  break;
19282  }
19283 
19284  return false;
19285 }
19286 
19287 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19288  if (V < 0)
19289  return false;
19290 
19291  unsigned Scale = 1;
19292  switch (VT.getSimpleVT().SimpleTy) {
19293  case MVT::i1:
19294  case MVT::i8:
19295  // Scale == 1;
19296  break;
19297  case MVT::i16:
19298  // Scale == 2;
19299  Scale = 2;
19300  break;
19301  default:
19302  // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19303  // Scale == 4;
19304  Scale = 4;
19305  break;
19306  }
19307 
19308  if ((V & (Scale - 1)) != 0)
19309  return false;
19310  return isUInt<5>(V / Scale);
19311 }
19312 
19313 static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19314  const ARMSubtarget *Subtarget) {
19315  if (!VT.isInteger() && !VT.isFloatingPoint())
19316  return false;
19317  if (VT.isVector() && Subtarget->hasNEON())
19318  return false;
19319  if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19320  !Subtarget->hasMVEFloatOps())
19321  return false;
19322 
19323  bool IsNeg = false;
19324  if (V < 0) {
19325  IsNeg = true;
19326  V = -V;
19327  }
19328 
19329  unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19330 
19331  // MVE: size * imm7
19332  if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19333  switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19334  case MVT::i32:
19335  case MVT::f32:
19336  return isShiftedUInt<7,2>(V);
19337  case MVT::i16:
19338  case MVT::f16:
19339  return isShiftedUInt<7,1>(V);
19340  case MVT::i8:
19341  return isUInt<7>(V);
19342  default:
19343  return false;
19344  }
19345  }
19346 
19347  // half VLDR: 2 * imm8
19348  if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19349  return isShiftedUInt<8, 1>(V);
19350  // VLDR and LDRD: 4 * imm8
19351  if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19352  return isShiftedUInt<8, 2>(V);
19353 
19354  if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19355  // + imm12 or - imm8
19356  if (IsNeg)
19357  return isUInt<8>(V);
19358  return isUInt<12>(V);
19359  }
19360 
19361  return false;
19362 }
19363 
19364 /// isLegalAddressImmediate - Return true if the integer value can be used
19365 /// as the offset of the target addressing mode for load / store of the
19366 /// given type.
19367 static bool isLegalAddressImmediate(int64_t V, EVT VT,
19368  const ARMSubtarget *Subtarget) {
19369  if (V == 0)
19370  return true;
19371 
19372  if (!VT.isSimple())
19373  return false;
19374 
19375  if (Subtarget->isThumb1Only())
19376  return isLegalT1AddressImmediate(V, VT);
19377  else if (Subtarget->isThumb2())
19378  return isLegalT2AddressImmediate(V, VT, Subtarget);
19379 
19380  // ARM mode.
19381  if (V < 0)
19382  V = - V;
19383  switch (VT.getSimpleVT().SimpleTy) {
19384  default: return false;
19385  case MVT::i1:
19386  case MVT::i8:
19387  case MVT::i32:
19388  // +- imm12
19389  return isUInt<12>(V);
19390  case MVT::i16:
19391  // +- imm8
19392  return isUInt<8>(V);
19393  case MVT::f32:
19394  case MVT::f64:
19395  if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19396  return false;
19397  return isShiftedUInt<8, 2>(V);
19398  }
19399 }
19400 
19402  EVT VT) const {
19403  int Scale = AM.Scale;
19404  if (Scale < 0)
19405  return false;
19406 
19407  switch (VT.getSimpleVT().SimpleTy) {
19408  default: return false;
19409  case MVT::i1:
19410  case MVT::i8:
19411  case MVT::i16:
19412  case MVT::i32:
19413  if (Scale == 1)
19414  return true;
19415  // r + r << imm
19416  Scale = Scale & ~1;
19417  return Scale == 2 || Scale == 4 || Scale == 8;
19418  case MVT::i64:
19419  // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19420  // version in Thumb mode.
19421  // r + r
19422  if (Scale == 1)
19423  return true;
19424  // r * 2 (this can be lowered to r + r).
19425  if (!AM.HasBaseReg && Scale == 2)
19426  return true;
19427  return false;
19428  case MVT::isVoid:
19429  // Note, we allow "void" uses (basically, uses that aren't loads or
19430  // stores), because arm allows folding a scale into many arithmetic
19431  // operations. This should be made more precise and revisited later.
19432 
19433  // Allow r << imm, but the imm has to be a multiple of two.
19434  if (Scale & 1) return false;
19435  return isPowerOf2_32(Scale);
19436  }
19437 }
19438 
19440  EVT VT) const {
19441  const int Scale = AM.Scale;
19442 
19443  // Negative scales are not supported in Thumb1.
19444  if (Scale < 0)
19445  return false;
19446 
19447  // Thumb1 addressing modes do not support register scaling excepting the
19448  // following cases:
19449  // 1. Scale == 1 means no scaling.
19450  // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19451  return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19452 }
19453 
19454 /// isLegalAddressingMode - Return true if the addressing mode represented
19455 /// by AM is legal for this target, for a load/store of the specified type.
19457  const AddrMode &AM, Type *Ty,
19458  unsigned AS, Instruction *I) const {
19459  EVT VT = getValueType(DL, Ty, true);
19460  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19461  return false;
19462 
19463  // Can never fold addr of global into load/store.
19464  if (AM.BaseGV)
19465  return false;
19466 
19467  switch (AM.Scale) {
19468  case 0: // no scale reg, must be "r+i" or "r", or "i".
19469  break;
19470  default:
19471  // ARM doesn't support any R+R*scale+imm addr modes.
19472  if (AM.BaseOffs)
19473  return false;
19474 
19475  if (!VT.isSimple())
19476  return false;
19477 
19478  if (Subtarget->isThumb1Only())
19479  return isLegalT1ScaledAddressingMode(AM, VT);
19480 
19481  if (Subtarget->isThumb2())
19482  return isLegalT2ScaledAddressingMode(AM, VT);
19483 
19484  int Scale = AM.Scale;
19485  switch (VT.getSimpleVT().SimpleTy) {
19486  default: return false;
19487  case MVT::i1:
19488  case MVT::i8:
19489  case MVT::i32:
19490  if (Scale < 0) Scale = -Scale;
19491  if (Scale == 1)
19492  return true;
19493  // r + r << imm
19494  return isPowerOf2_32(Scale & ~1);
19495  case MVT::i16:
19496  case MVT::i64:
19497  // r +/- r
19498  if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19499  return true;
19500  // r * 2 (this can be lowered to r + r).
19501  if (!AM.HasBaseReg && Scale == 2)
19502  return true;
19503  return false;
19504 
19505  case MVT::isVoid:
19506  // Note, we allow "void" uses (basically, uses that aren't loads or
19507  // stores), because arm allows folding a scale into many arithmetic
19508  // operations. This should be made more precise and revisited later.
19509 
19510  // Allow r << imm, but the imm has to be a multiple of two.
19511  if (Scale & 1) return false;
19512  return isPowerOf2_32(Scale);
19513  }
19514  }
19515  return true;
19516 }
19517 
19518 /// isLegalICmpImmediate - Return true if the specified immediate is legal
19519 /// icmp immediate, that is the target has icmp instructions which can compare
19520 /// a register against the immediate without having to materialize the
19521 /// immediate into a register.
19523  // Thumb2 and ARM modes can use cmn for negative immediates.
19524  if (!Subtarget->isThumb())
19525  return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19526  ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19527  if (Subtarget->isThumb2())
19528  return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19530  // Thumb1 doesn't have cmn, and only 8-bit immediates.
19531  return Imm >= 0 && Imm <= 255;
19532 }
19533 
19534 /// isLegalAddImmediate - Return true if the specified immediate is a legal add
19535 /// *or sub* immediate, that is the target has add or sub instructions which can
19536 /// add a register with the immediate without having to materialize the
19537 /// immediate into a register.
19539  // Same encoding for add/sub, just flip the sign.
19540  int64_t AbsImm = std::abs(Imm);
19541  if (!Subtarget->isThumb())
19542  return ARM_AM::getSOImmVal(AbsImm) != -1;
19543  if (Subtarget->isThumb2())
19544  return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19545  // Thumb1 only has 8-bit unsigned immediate.
19546  return AbsImm >= 0 && AbsImm <= 255;
19547 }
19548 
19549 // Return false to prevent folding
19550 // (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19551 // if the folding leads to worse code.
19553  SDValue ConstNode) const {
19554  // Let the DAGCombiner decide for vector types and large types.
19555  const EVT VT = AddNode.getValueType();
19556  if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19557  return true;
19558 
19559  // It is worse if c0 is legal add immediate, while c1*c0 is not
19560  // and has to be composed by at least two instructions.
19561  const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19562  const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19563  const int64_t C0 = C0Node->getSExtValue();
19564  APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19566  return true;
19567  if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19568  return false;
19569 
19570  // Default to true and let the DAGCombiner decide.
19571  return true;
19572 }
19573 
19575  bool isSEXTLoad, SDValue &Base,
19576  SDValue &Offset, bool &isInc,
19577  SelectionDAG &DAG) {
19578  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19579  return false;
19580 
19581  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19582  // AddressingMode 3
19583  Base = Ptr->getOperand(0);
19584  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19585  int RHSC = (int)RHS->getZExtValue();
19586  if (RHSC < 0 && RHSC > -256) {
19587  assert(Ptr->getOpcode() == ISD::ADD);
19588  isInc = false;
19589  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19590  return true;
19591  }
19592  }
19593  isInc = (Ptr->getOpcode() == ISD::ADD);
19594  Offset = Ptr->getOperand(1);
19595  return true;
19596  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19597  // AddressingMode 2
19598  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19599  int RHSC = (int)RHS->getZExtValue();
19600  if (RHSC < 0 && RHSC > -0x1000) {
19601  assert(Ptr->getOpcode() == ISD::ADD);
19602  isInc = false;
19603  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19604  Base = Ptr->getOperand(0);
19605  return true;
19606  }
19607  }
19608 
19609  if (Ptr->getOpcode() == ISD::ADD) {
19610  isInc = true;
19611  ARM_AM::ShiftOpc ShOpcVal=
19612  ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19613  if (ShOpcVal != ARM_AM::no_shift) {
19614  Base = Ptr->getOperand(1);
19615  Offset = Ptr->getOperand(0);
19616  } else {
19617  Base = Ptr->getOperand(0);
19618  Offset = Ptr->getOperand(1);
19619  }
19620  return true;
19621  }
19622 
19623  isInc = (Ptr->getOpcode() == ISD::ADD);
19624  Base = Ptr->getOperand(0);
19625  Offset = Ptr->getOperand(1);
19626  return true;
19627  }
19628 
19629  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19630  return false;
19631 }
19632 
19634  bool isSEXTLoad, SDValue &Base,
19635  SDValue &Offset, bool &isInc,
19636  SelectionDAG &DAG) {
19637  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19638  return false;
19639 
19640  Base = Ptr->getOperand(0);
19641  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19642  int RHSC = (int)RHS->getZExtValue();
19643  if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19644  assert(Ptr->getOpcode() == ISD::ADD);
19645  isInc = false;
19646  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19647  return true;
19648  } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19649  isInc = Ptr->getOpcode() == ISD::ADD;
19650  Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19651  return true;
19652  }
19653  }
19654 
19655  return false;
19656 }
19657 
19658 static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19659  bool isSEXTLoad, bool IsMasked, bool isLE,
19660  SDValue &Base, SDValue &Offset,
19661  bool &isInc, SelectionDAG &DAG) {
19662  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19663  return false;
19664  if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19665  return false;
19666 
19667  // We allow LE non-masked loads to change the type (for example use a vldrb.8
19668  // as opposed to a vldrw.32). This can allow extra addressing modes or
19669  // alignments for what is otherwise an equivalent instruction.
19670  bool CanChangeType = isLE && !IsMasked;
19671 
19672  ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19673  int RHSC = (int)RHS->getZExtValue();
19674 
19675  auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19676  if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19677  assert(Ptr->getOpcode() == ISD::ADD);
19678  isInc = false;
19679  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19680  return true;
19681  } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19682  isInc = Ptr->getOpcode() == ISD::ADD;
19683  Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19684  return true;
19685  }
19686  return false;
19687  };
19688 
19689  // Try to find a matching instruction based on s/zext, Alignment, Offset and
19690  // (in BE/masked) type.
19691  Base = Ptr->getOperand(0);
19692  if (VT == MVT::v4i16) {
19693  if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19694  return true;
19695  } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19696  if (IsInRange(RHSC, 0x80, 1))
19697  return true;
19698  } else if (Alignment >= 4 &&
19699  (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19700  IsInRange(RHSC, 0x80, 4))
19701  return true;
19702  else if (Alignment >= 2 &&
19703  (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19704  IsInRange(RHSC, 0x80, 2))
19705  return true;
19706  else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19707  return true;
19708  return false;
19709 }
19710 
19711 /// getPreIndexedAddressParts - returns true by value, base pointer and
19712 /// offset pointer and addressing mode by reference if the node's address
19713 /// can be legally represented as pre-indexed load / store address.
19714 bool
19716  SDValue &Offset,
19717  ISD::MemIndexedMode &AM,
19718  SelectionDAG &DAG) const {
19719  if (Subtarget->isThumb1Only())
19720  return false;
19721 
19722  EVT VT;
19723  SDValue Ptr;
19724  Align Alignment;
19725  bool isSEXTLoad = false;
19726  bool IsMasked = false;
19727  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19728  Ptr = LD->getBasePtr();
19729  VT = LD->getMemoryVT();
19730  Alignment = LD->getAlign();
19731  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19732  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19733  Ptr = ST->getBasePtr();
19734  VT = ST->getMemoryVT();
19735  Alignment = ST->getAlign();
19736  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19737  Ptr = LD->getBasePtr();
19738  VT = LD->getMemoryVT();
19739  Alignment = LD->getAlign();
19740  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19741  IsMasked = true;
19742  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19743  Ptr = ST->getBasePtr();
19744  VT = ST->getMemoryVT();
19745  Alignment = ST->getAlign();
19746  IsMasked = true;
19747  } else
19748  return false;
19749 
19750  bool isInc;
19751  bool isLegal = false;
19752  if (VT.isVector())
19753  isLegal = Subtarget->hasMVEIntegerOps() &&
19755  Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19756  Subtarget->isLittle(), Base, Offset, isInc, DAG);
19757  else {
19758  if (Subtarget->isThumb2())
19759  isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19760  Offset, isInc, DAG);
19761  else
19762  isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19763  Offset, isInc, DAG);
19764  }
19765  if (!isLegal)
19766  return false;
19767 
19768  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19769  return true;
19770 }
19771 
19772 /// getPostIndexedAddressParts - returns true by value, base pointer and
19773 /// offset pointer and addressing mode by reference if this node can be
19774 /// combined with a load / store to form a post-indexed load / store.
19776  SDValue &Base,
19777  SDValue &Offset,
19778  ISD::MemIndexedMode &AM,
19779  SelectionDAG &DAG) const {
19780  EVT VT;
19781  SDValue Ptr;
19782  Align Alignment;
19783  bool isSEXTLoad = false, isNonExt;
19784  bool IsMasked = false;
19785  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19786  VT = LD->getMemoryVT();
19787  Ptr = LD->getBasePtr();
19788  Alignment = LD->getAlign();
19789  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19790  isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19791  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19792  VT = ST->getMemoryVT();
19793  Ptr = ST->getBasePtr();
19794  Alignment = ST->getAlign();
19795  isNonExt = !ST->isTruncatingStore();
19796  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19797  VT = LD->getMemoryVT();
19798  Ptr = LD->getBasePtr();
19799  Alignment = LD->getAlign();
19800  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19801  isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19802  IsMasked = true;
19803  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19804  VT = ST->getMemoryVT();
19805  Ptr = ST->getBasePtr();
19806  Alignment = ST->getAlign();
19807  isNonExt = !ST->isTruncatingStore();
19808  IsMasked = true;
19809  } else
19810  return false;
19811 
19812  if (Subtarget->isThumb1Only()) {
19813  // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
19814  // must be non-extending/truncating, i32, with an offset of 4.
19815  assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
19816  if (Op->getOpcode() != ISD::ADD || !isNonExt)
19817  return false;
19818  auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
19819  if (!RHS || RHS->getZExtValue() != 4)
19820  return false;
19821  if (Alignment < Align(4))
19822  return false;
19823 
19824  Offset = Op->getOperand(1);
19825  Base = Op->getOperand(0);
19826  AM = ISD::POST_INC;
19827  return true;
19828  }
19829 
19830  bool isInc;
19831  bool isLegal = false;
19832  if (VT.isVector())
19833  isLegal = Subtarget->hasMVEIntegerOps() &&
19834  getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
19835  Subtarget->isLittle(), Base, Offset,
19836  isInc, DAG);
19837  else {
19838  if (Subtarget->isThumb2())
19840  isInc, DAG);
19841  else
19843  isInc, DAG);
19844  }
19845  if (!isLegal)
19846  return false;
19847 
19848  if (Ptr != Base) {
19849  // Swap base ptr and offset to catch more post-index load / store when
19850  // it's legal. In Thumb2 mode, offset must be an immediate.
19851  if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
19852  !Subtarget->isThumb2())
19853  std::swap(Base, Offset);
19854 
19855  // Post-indexed load / store update the base pointer.
19856  if (Ptr != Base)
19857  return false;
19858  }
19859 
19860  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
19861  return true;
19862 }
19863 
19865  KnownBits &Known,
19866  const APInt &DemandedElts,
19867  const SelectionDAG &DAG,
19868  unsigned Depth) const {
19869  unsigned BitWidth = Known.getBitWidth();
19870  Known.resetAll();
19871  switch (Op.getOpcode()) {
19872  default: break;
19873  case ARMISD::ADDC:
19874  case ARMISD::ADDE:
19875  case ARMISD::SUBC:
19876  case ARMISD::SUBE:
19877  // Special cases when we convert a carry to a boolean.
19878  if (Op.getResNo() == 0) {
19879  SDValue LHS = Op.getOperand(0);
19880  SDValue RHS = Op.getOperand(1);
19881  // (ADDE 0, 0, C) will give us a single bit.
19882  if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
19883  isNullConstant(RHS)) {
19885  return;
19886  }
19887  }
19888  break;
19889  case ARMISD::CMOV: {
19890  // Bits are known zero/one if known on the LHS and RHS.
19891  Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
19892  if (Known.isUnknown())
19893  return;
19894 
19895  KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
19896  Known = KnownBits::commonBits(Known, KnownRHS);
19897  return;
19898  }
19899  case ISD::INTRINSIC_W_CHAIN: {
19900  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
19901  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
19902  switch (IntID) {
19903  default: return;
19904  case Intrinsic::arm_ldaex:
19905  case Intrinsic::arm_ldrex: {
19906  EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
19907  unsigned MemBits = VT.getScalarSizeInBits();
19908  Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
19909  return;
19910  }
19911  }
19912  }
19913  case ARMISD::BFI: {
19914  // Conservatively, we can recurse down the first operand
19915  // and just mask out all affected bits.
19916  Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19917 
19918  // The operand to BFI is already a mask suitable for removing the bits it
19919  // sets.
19920  ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
19921  const APInt &Mask = CI->getAPIntValue();
19922  Known.Zero &= Mask;
19923  Known.One &= Mask;
19924  return;
19925  }
19926  case ARMISD::VGETLANEs:
19927  case ARMISD::VGETLANEu: {
19928  const SDValue &SrcSV = Op.getOperand(0);
19929  EVT VecVT = SrcSV.getValueType();
19930  assert(VecVT.isVector() && "VGETLANE expected a vector type");
19931  const unsigned NumSrcElts = VecVT.getVectorNumElements();
19932  ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
19933  assert(Pos->getAPIntValue().ult(NumSrcElts) &&
19934  "VGETLANE index out of bounds");
19935  unsigned Idx = Pos->getZExtValue();
19936  APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
19937  Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
19938 
19939  EVT VT = Op.getValueType();
19940  const unsigned DstSz = VT.getScalarSizeInBits();
19941  const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
19942  (void)SrcSz;
19943  assert(SrcSz == Known.getBitWidth());
19944  assert(DstSz > SrcSz);
19945  if (Op.getOpcode() == ARMISD::VGETLANEs)
19946  Known = Known.sext(DstSz);
19947  else {
19948  Known = Known.zext(DstSz);
19949  }
19950  assert(DstSz == Known.getBitWidth());
19951  break;
19952  }
19953  case ARMISD::VMOVrh: {
19954  KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
19955  assert(KnownOp.getBitWidth() == 16);
19956  Known = KnownOp.zext(32);
19957  break;
19958  }
19959  case ARMISD::CSINC:
19960  case ARMISD::CSINV:
19961  case ARMISD::CSNEG: {
19962  KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
19963  KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
19964 
19965  // The result is either:
19966  // CSINC: KnownOp0 or KnownOp1 + 1
19967  // CSINV: KnownOp0 or ~KnownOp1
19968  // CSNEG: KnownOp0 or KnownOp1 * -1
19969  if (Op.getOpcode() == ARMISD::CSINC)
19970  KnownOp1 = KnownBits::computeForAddSub(
19971  true, false, KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
19972  else if (Op.getOpcode() == ARMISD::CSINV)
19973  std::swap(KnownOp1.Zero, KnownOp1.One);
19974  else if (Op.getOpcode() == ARMISD::CSNEG)
19975  KnownOp1 = KnownBits::mul(
19976  KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
19977 
19978  Known = KnownBits::commonBits(KnownOp0, KnownOp1);
19979  break;
19980  }
19981  }
19982 }
19983 
19985  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
19986  TargetLoweringOpt &TLO) const {
19987  // Delay optimization, so we don't have to deal with illegal types, or block
19988  // optimizations.
19989  if (!TLO.LegalOps)
19990  return false;
19991 
19992  // Only optimize AND for now.
19993  if (Op.getOpcode() != ISD::AND)
19994  return false;
19995 
19996  EVT VT = Op.getValueType();
19997 
19998  // Ignore vectors.
19999  if (VT.isVector())
20000  return false;
20001 
20002  assert(VT == MVT::i32 && "Unexpected integer type");
20003 
20004  // Make sure the RHS really is a constant.
20005  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20006  if (!C)
20007  return false;
20008 
20009  unsigned Mask = C->getZExtValue();
20010 
20011  unsigned Demanded = DemandedBits.getZExtValue();
20012  unsigned ShrunkMask = Mask & Demanded;
20013  unsigned ExpandedMask = Mask | ~Demanded;
20014 
20015  // If the mask is all zeros, let the target-independent code replace the
20016  // result with zero.
20017  if (ShrunkMask == 0)
20018  return false;
20019 
20020  // If the mask is all ones, erase the AND. (Currently, the target-independent
20021  // code won't do this, so we have to do it explicitly to avoid an infinite
20022  // loop in obscure cases.)
20023  if (ExpandedMask == ~0U)
20024  return TLO.CombineTo(Op, Op.getOperand(0));
20025 
20026  auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20027  return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20028  };
20029  auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20030  if (NewMask == Mask)
20031  return true;
20032  SDLoc DL(Op);
20033  SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20034  SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20035  return TLO.CombineTo(Op, NewOp);
20036  };
20037 
20038  // Prefer uxtb mask.
20039  if (IsLegalMask(0xFF))
20040  return UseMask(0xFF);
20041 
20042  // Prefer uxth mask.
20043  if (IsLegalMask(0xFFFF))
20044  return UseMask(0xFFFF);
20045 
20046  // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20047  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20048  if (ShrunkMask < 256)
20049  return UseMask(ShrunkMask);
20050 
20051  // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20052  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20053  if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20054  return UseMask(ExpandedMask);
20055 
20056  // Potential improvements:
20057  //
20058  // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20059  // We could try to prefer Thumb1 immediates which can be lowered to a
20060  // two-instruction sequence.
20061  // We could try to recognize more legal ARM/Thumb2 immediates here.
20062 
20063  return false;
20064 }
20065 
20067  SDValue Op, const APInt &OriginalDemandedBits,
20068  const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20069  unsigned Depth) const {
20070  unsigned Opc = Op.getOpcode();
20071 
20072  switch (Opc) {
20073  case ARMISD::ASRL:
20074  case ARMISD::LSRL: {
20075  // If this is result 0 and the other result is unused, see if the demand
20076  // bits allow us to shrink this long shift into a standard small shift in
20077  // the opposite direction.
20078  if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20079  isa<ConstantSDNode>(Op->getOperand(2))) {
20080  unsigned ShAmt = Op->getConstantOperandVal(2);
20081  if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20082  << (32 - ShAmt)))
20083  return TLO.CombineTo(
20084  Op, TLO.DAG.getNode(
20085  ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20086  TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20087  }
20088  break;
20089  }
20090  case ARMISD::VBICIMM: {
20091  SDValue Op0 = Op.getOperand(0);
20092  unsigned ModImm = Op.getConstantOperandVal(1);
20093  unsigned EltBits = 0;
20094  uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20095  if ((OriginalDemandedBits & Mask) == 0)
20096  return TLO.CombineTo(Op, Op0);
20097  }
20098  }
20099 
20101  Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20102 }
20103 
20104 //===----------------------------------------------------------------------===//
20105 // ARM Inline Assembly Support
20106 //===----------------------------------------------------------------------===//
20107 
20109  // Looking for "rev" which is V6+.
20110  if (!Subtarget->hasV6Ops())
20111  return false;
20112 
20113  InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20114  std::string AsmStr = IA->getAsmString();
20115  SmallVector<StringRef, 4> AsmPieces;
20116  SplitString(AsmStr, AsmPieces, ";\n");
20117 
20118  switch (AsmPieces.size()) {
20119  default: return false;
20120  case 1:
20121  AsmStr = std::string(AsmPieces[0]);
20122  AsmPieces.clear();
20123  SplitString(AsmStr, AsmPieces, " \t,");
20124 
20125  // rev $0, $1
20126  if (AsmPieces.size() == 3 &&
20127  AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20128  IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20129  IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20130  if (Ty && Ty->getBitWidth() == 32)
20132  }
20133  break;
20134  }
20135 
20136  return false;
20137 }
20138 
20139 const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20140  // At this point, we have to lower this constraint to something else, so we
20141  // lower it to an "r" or "w". However, by doing this we will force the result
20142  // to be in register, while the X constraint is much more permissive.
20143  //
20144  // Although we are correct (we are free to emit anything, without
20145  // constraints), we might break use cases that would expect us to be more
20146  // efficient and emit something else.
20147  if (!Subtarget->hasVFP2Base())
20148  return "r";
20149  if (ConstraintVT.isFloatingPoint())
20150  return "w";
20151  if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20152  (ConstraintVT.getSizeInBits() == 64 ||
20153  ConstraintVT.getSizeInBits() == 128))
20154  return "w";
20155 
20156  return "r";
20157 }
20158 
20159 /// getConstraintType - Given a constraint letter, return the type of
20160 /// constraint it is for this target.
20163  unsigned S = Constraint.size();
20164  if (S == 1) {
20165  switch (Constraint[0]) {
20166  default: break;
20167  case 'l': return C_RegisterClass;
20168  case 'w': return C_RegisterClass;
20169  case 'h': return C_RegisterClass;
20170  case 'x': return C_RegisterClass;
20171  case 't': return C_RegisterClass;
20172  case 'j': return C_Immediate; // Constant for movw.
20173  // An address with a single base register. Due to the way we
20174  // currently handle addresses it is the same as an 'r' memory constraint.
20175  case 'Q': return C_Memory;
20176  }
20177  } else if (S == 2) {
20178  switch (Constraint[0]) {
20179  default: break;
20180  case 'T': return C_RegisterClass;
20181  // All 'U+' constraints are addresses.
20182  case 'U': return C_Memory;
20183  }
20184  }
20185  return TargetLowering::getConstraintType(Constraint);
20186 }
20187 
20188 /// Examine constraint type and operand type and determine a weight value.
20189 /// This object must already have been set up with the operand type
20190 /// and the current alternative constraint selected.
20193  AsmOperandInfo &info, const char *constraint) const {
20194  ConstraintWeight weight = CW_Invalid;
20195  Value *CallOperandVal = info.CallOperandVal;
20196  // If we don't have a value, we can't do a match,
20197  // but allow it at the lowest weight.
20198  if (!CallOperandVal)
20199  return CW_Default;
20200  Type *type = CallOperandVal->getType();
20201  // Look at the constraint type.
20202  switch (*constraint) {
20203  default:
20205  break;
20206  case 'l':
20207  if (type->isIntegerTy()) {
20208  if (Subtarget->isThumb())
20209  weight = CW_SpecificReg;
20210  else
20211  weight = CW_Register;
20212  }
20213  break;
20214  case 'w':
20215  if (type->isFloatingPointTy())
20216  weight = CW_Register;
20217  break;
20218  }
20219  return weight;
20220 }
20221 
20222 using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20223 
20225  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20226  switch (Constraint.size()) {
20227  case 1:
20228  // GCC ARM Constraint Letters
20229  switch (Constraint[0]) {
20230  case 'l': // Low regs or general regs.
20231  if (Subtarget->isThumb())
20232  return RCPair(0U, &ARM::tGPRRegClass);
20233  return RCPair(0U, &ARM::GPRRegClass);
20234  case 'h': // High regs or no regs.
20235  if (Subtarget->isThumb())
20236  return RCPair(0U, &ARM::hGPRRegClass);
20237  break;
20238  case 'r':
20239  if (Subtarget->isThumb1Only())
20240  return RCPair(0U, &ARM::tGPRRegClass);
20241  return RCPair(0U, &ARM::GPRRegClass);
20242  case 'w':
20243  if (VT == MVT::Other)
20244  break;
20245  if (VT == MVT::f16 || VT == MVT::bf16)
20246  return RCPair(0U, &ARM::HPRRegClass);
20247  if (VT == MVT::f32)
20248  return RCPair(0U, &ARM::SPRRegClass);
20249  if (VT.getSizeInBits() == 64)
20250  return RCPair(0U, &ARM::DPRRegClass);
20251  if (VT.getSizeInBits() == 128)
20252  return RCPair(0U, &ARM::QPRRegClass);
20253  break;
20254  case 'x':
20255  if (VT == MVT::Other)
20256  break;
20257  if (VT == MVT::f32)
20258  return RCPair(0U, &ARM::SPR_8RegClass);
20259  if (VT.getSizeInBits() == 64)
20260  return RCPair(0U, &ARM::DPR_8RegClass);
20261  if (VT.getSizeInBits() == 128)
20262  return RCPair(0U, &ARM::QPR_8RegClass);
20263  break;
20264  case 't':
20265  if (VT == MVT::Other)
20266  break;
20267  if (VT == MVT::f16 || VT == MVT::bf16)
20268  return RCPair(0U, &ARM::HPRRegClass);
20269  if (VT == MVT::f32 || VT == MVT::i32)
20270  return RCPair(0U, &ARM::SPRRegClass);
20271  if (VT.getSizeInBits() == 64)
20272  return RCPair(0U, &ARM::DPR_VFP2RegClass);
20273  if (VT.getSizeInBits() == 128)
20274  return RCPair(0U, &ARM::QPR_VFP2RegClass);
20275  break;
20276  }
20277  break;
20278 
20279  case 2:
20280  if (Constraint[0] == 'T') {
20281  switch (Constraint[1]) {
20282  default:
20283  break;
20284  case 'e':
20285  return RCPair(0U, &ARM::tGPREvenRegClass);
20286  case 'o':
20287  return RCPair(0U, &ARM::tGPROddRegClass);
20288  }
20289  }
20290  break;
20291 
20292  default:
20293  break;
20294  }
20295 
20296  if (StringRef("{cc}").equals_insensitive(Constraint))
20297  return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20298 
20299  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20300 }
20301 
20302 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20303 /// vector. If it is invalid, don't add anything to Ops.
20305  std::string &Constraint,
20306  std::vector<SDValue>&Ops,
20307  SelectionDAG &DAG) const {
20308  SDValue Result;
20309 
20310  // Currently only support length 1 constraints.
20311  if (Constraint.length() != 1) return;
20312 
20313  char ConstraintLetter = Constraint[0];
20314  switch (ConstraintLetter) {
20315  default: break;
20316  case 'j':
20317  case 'I': case 'J': case 'K': case 'L':
20318  case 'M': case 'N': case 'O':
20319  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20320  if (!C)
20321  return;
20322 
20323  int64_t CVal64 = C->getSExtValue();
20324  int CVal = (int) CVal64;
20325  // None of these constraints allow values larger than 32 bits. Check
20326  // that the value fits in an int.
20327  if (CVal != CVal64)
20328  return;
20329 
20330  switch (ConstraintLetter) {
20331  case 'j':
20332  // Constant suitable for movw, must be between 0 and
20333  // 65535.
20334  if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20335  if (CVal >= 0 && CVal <= 65535)
20336  break;
20337  return;
20338  case 'I':
20339  if (Subtarget->isThumb1Only()) {
20340  // This must be a constant between 0 and 255, for ADD
20341  // immediates.
20342  if (CVal >= 0 && CVal <= 255)
20343  break;
20344  } else if (Subtarget->isThumb2()) {
20345  // A constant that can be used as an immediate value in a
20346  // data-processing instruction.
20347  if (ARM_AM::getT2SOImmVal(CVal) != -1)
20348  break;
20349  } else {
20350  // A constant that can be used as an immediate value in a
20351  // data-processing instruction.
20352  if (ARM_AM::getSOImmVal(CVal) != -1)
20353  break;
20354  }
20355  return;
20356 
20357  case 'J':
20358  if (Subtarget->isThumb1Only()) {
20359  // This must be a constant between -255 and -1, for negated ADD
20360  // immediates. This can be used in GCC with an "n" modifier that
20361  // prints the negated value, for use with SUB instructions. It is
20362  // not useful otherwise but is implemented for compatibility.
20363  if (CVal >= -255 && CVal <= -1)
20364  break;
20365  } else {
20366  // This must be a constant between -4095 and 4095. It is not clear
20367  // what this constraint is intended for. Implemented for
20368  // compatibility with GCC.
20369  if (CVal >= -4095 && CVal <= 4095)
20370  break;
20371  }
20372  return;
20373 
20374  case 'K':
20375  if (Subtarget->isThumb1Only()) {
20376  // A 32-bit value where only one byte has a nonzero value. Exclude
20377  // zero to match GCC. This constraint is used by GCC internally for
20378  // constants that can be loaded with a move/shift combination.
20379  // It is not useful otherwise but is implemented for compatibility.
20380  if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20381  break;
20382  } else if (Subtarget->isThumb2()) {
20383  // A constant whose bitwise inverse can be used as an immediate
20384  // value in a data-processing instruction. This can be used in GCC
20385  // with a "B" modifier that prints the inverted value, for use with
20386  // BIC and MVN instructions. It is not useful otherwise but is
20387  // implemented for compatibility.
20388  if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20389  break;
20390  } else {
20391  // A constant whose bitwise inverse can be used as an immediate
20392  // value in a data-processing instruction. This can be used in GCC
20393  // with a "B" modifier that prints the inverted value, for use with
20394  // BIC and MVN instructions. It is not useful otherwise but is
20395  // implemented for compatibility.
20396  if (ARM_AM::getSOImmVal(~CVal) != -1)
20397  break;
20398  }
20399  return;
20400 
20401  case 'L':
20402  if (Subtarget->isThumb1Only()) {
20403  // This must be a constant between -7 and 7,
20404  // for 3-operand ADD/SUB immediate instructions.
20405  if (CVal >= -7 && CVal < 7)
20406  break;
20407  } else if (Subtarget->isThumb2()) {
20408  // A constant whose negation can be used as an immediate value in a
20409  // data-processing instruction. This can be used in GCC with an "n"
20410  // modifier that prints the negated value, for use with SUB
20411  // instructions. It is not useful otherwise but is implemented for
20412  // compatibility.
20413  if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20414  break;
20415  } else {
20416  // A constant whose negation can be used as an immediate value in a
20417  // data-processing instruction. This can be used in GCC with an "n"
20418  // modifier that prints the negated value, for use with SUB
20419  // instructions. It is not useful otherwise but is implemented for
20420  // compatibility.
20421  if (ARM_AM::getSOImmVal(-CVal) != -1)
20422  break;
20423  }
20424  return;
20425 
20426  case 'M':
20427  if (Subtarget->isThumb1Only()) {
20428  // This must be a multiple of 4 between 0 and 1020, for
20429  // ADD sp + immediate.
20430  if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20431  break;
20432  } else {
20433  // A power of two or a constant between 0 and 32. This is used in
20434  // GCC for the shift amount on shifted register operands, but it is
20435  // useful in general for any shift amounts.
20436  if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20437  break;
20438  }
20439  return;
20440 
20441  case 'N':
20442  if (Subtarget->isThumb1Only()) {
20443  // This must be a constant between 0 and 31, for shift amounts.
20444  if (CVal >= 0 && CVal <= 31)
20445  break;
20446  }
20447  return;
20448 
20449  case 'O':
20450  if (Subtarget->isThumb1Only()) {
20451  // This must be a multiple of 4 between -508 and 508, for
20452  // ADD/SUB sp = sp + immediate.
20453  if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20454  break;
20455  }
20456  return;
20457  }
20458  Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20459  break;
20460  }
20461 
20462  if (Result.getNode()) {
20463  Ops.push_back(Result);
20464  return;
20465  }
20466  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20467 }
20468 
20470  const SDNode *N, MVT::SimpleValueType SVT) {
20471  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20472  N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20473  "Unhandled Opcode in getDivRemLibcall");
20474  bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20475  N->getOpcode() == ISD::SREM;
20476  RTLIB::Libcall LC;
20477  switch (SVT) {
20478  default: llvm_unreachable("Unexpected request for libcall!");
20479  case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20480  case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20481  case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20482  case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20483  }
20484  return LC;
20485 }
20486 
20488  const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20489  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20490  N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20491  "Unhandled Opcode in getDivRemArgList");
20492  bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20493  N->getOpcode() == ISD::SREM;
20495  TargetLowering::ArgListEntry Entry;
20496  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20497  EVT ArgVT = N->getOperand(i).getValueType();
20498  Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20499  Entry.Node = N->getOperand(i);
20500  Entry.Ty = ArgTy;
20501  Entry.IsSExt = isSigned;
20502  Entry.IsZExt = !isSigned;
20503  Args.push_back(Entry);
20504  }
20505  if (Subtarget->isTargetWindows() && Args.size() >= 2)
20506  std::swap(Args[0], Args[1]);
20507  return Args;
20508 }
20509 
20510 SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20511  assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20512  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20513  Subtarget->isTargetWindows()) &&
20514  "Register-based DivRem lowering only");
20515  unsigned Opcode = Op->getOpcode();
20516  assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20517  "Invalid opcode for Div/Rem lowering");
20518  bool isSigned = (Opcode == ISD::SDIVREM);
20519  EVT VT = Op->getValueType(0);
20520  SDLoc dl(Op);
20521 
20522  if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20524  if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20525  SDValue Res0 =
20526  DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20527  SDValue Res1 =
20528  DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20529  return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20530  {Res0, Res1});
20531  }
20532  }
20533 
20534  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20535 
20536  // If the target has hardware divide, use divide + multiply + subtract:
20537  // div = a / b
20538  // rem = a - b * div
20539  // return {div, rem}
20540  // This should be lowered into UDIV/SDIV + MLS later on.
20541  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20542  : Subtarget->hasDivideInARMMode();
20543  if (hasDivide && Op->getValueType(0).isSimple() &&
20544  Op->getSimpleValueType(0) == MVT::i32) {
20545  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20546  const SDValue Dividend = Op->getOperand(0);
20547  const SDValue Divisor = Op->getOperand(1);
20548  SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20549  SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20550  SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20551 
20552  SDValue Values[2] = {Div, Rem};
20553  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20554  }
20555 
20556  RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20557  VT.getSimpleVT().SimpleTy);
20558  SDValue InChain = DAG.getEntryNode();
20559 
20561  DAG.getContext(),
20562  Subtarget);
20563 
20565  getPointerTy(DAG.getDataLayout()));
20566 
20567  Type *RetTy = StructType::get(Ty, Ty);
20568 
20569  if (Subtarget->isTargetWindows())
20570  InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20571 
20573  CLI.setDebugLoc(dl).setChain(InChain)
20574  .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20576 
20577  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20578  return CallInfo.first;
20579 }
20580 
20581 // Lowers REM using divmod helpers
20582 // see RTABI section 4.2/4.3
20583 SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20584  EVT VT = N->getValueType(0);
20585 
20586  if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20588  if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20589  return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20590  Result[0], Result[1]);
20591  }
20592 
20593  // Build return types (div and rem)
20594  std::vector<Type*> RetTyParams;
20595  Type *RetTyElement;
20596 
20597  switch (VT.getSimpleVT().SimpleTy) {
20598  default: llvm_unreachable("Unexpected request for libcall!");
20599  case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20600  case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20601  case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20602  case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20603  }
20604 
20605  RetTyParams.push_back(RetTyElement);
20606  RetTyParams.push_back(RetTyElement);
20607  ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20608  Type *RetTy = StructType::get(*DAG.getContext(), ret);
20609 
20610  RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20611  SimpleTy);
20612  SDValue InChain = DAG.getEntryNode();
20614  Subtarget);
20615  bool isSigned = N->getOpcode() == ISD::SREM;
20617  getPointerTy(DAG.getDataLayout()));
20618 
20619  if (Subtarget->isTargetWindows())
20620  InChain = WinDBZCheckDenominator(DAG, N, InChain);
20621 
20622  // Lower call
20623  CallLoweringInfo CLI(DAG);
20624  CLI.setChain(InChain)
20625  .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20627  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20628 
20629  // Return second (rem) result operand (first contains div)
20630  SDNode *ResNode = CallResult.first.getNode();
20631  assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20632  return ResNode->getOperand(1);
20633 }
20634 
20635 SDValue
20636 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20637  assert(Subtarget->isTargetWindows() && "unsupported target platform");
20638  SDLoc DL(Op);
20639 
20640  // Get the inputs.
20641  SDValue Chain = Op.getOperand(0);
20642  SDValue Size = Op.getOperand(1);
20643 
20645  "no-stack-arg-probe")) {
20646  MaybeAlign Align =
20647  cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20648  SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20649  Chain = SP.getValue(1);
20650  SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20651  if (Align)
20652  SP =
20653  DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20654  DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20655  Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20656  SDValue Ops[2] = { SP, Chain };
20657  return DAG.getMergeValues(Ops, DL);
20658  }
20659 
20660  SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20661  DAG.getConstant(2, DL, MVT::i32));
20662 
20663  SDValue Flag;
20664  Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
20665  Flag = Chain.getValue(1);
20666 
20667  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20668  Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
20669 
20670  SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20671  Chain = NewSP.getValue(1);
20672 
20673  SDValue Ops[2] = { NewSP, Chain };
20674  return DAG.getMergeValues(Ops, DL);
20675 }
20676 
20677 SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20678  bool IsStrict = Op->isStrictFPOpcode();
20679  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20680  const unsigned DstSz = Op.getValueType().getSizeInBits();
20681  const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20682  assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20683  "Unexpected type for custom-lowering FP_EXTEND");
20684 
20685  assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20686  "With both FP DP and 16, any FP conversion is legal!");
20687 
20688  assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20689  "With FP16, 16 to 32 conversion is legal!");
20690 
20691  // Converting from 32 -> 64 is valid if we have FP64.
20692  if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20693  // FIXME: Remove this when we have strict fp instruction selection patterns
20694  if (IsStrict) {
20695  SDLoc Loc(Op);
20697  Loc, Op.getValueType(), SrcVal);
20698  return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20699  }
20700  return Op;
20701  }
20702 
20703  // Either we are converting from 16 -> 64, without FP16 and/or
20704  // FP.double-precision or without Armv8-fp. So we must do it in two
20705  // steps.
20706  // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20707  // without FP16. So we must do a function call.
20708  SDLoc Loc(Op);
20709  RTLIB::Libcall LC;
20710  MakeLibCallOptions CallOptions;
20711  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20712  for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20713  bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20714  MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20715  MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20716  if (Supported) {
20717  if (IsStrict) {
20718  SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20719  {DstVT, MVT::Other}, {Chain, SrcVal});
20720  Chain = SrcVal.getValue(1);
20721  } else {
20722  SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20723  }
20724  } else {
20725  LC = RTLIB::getFPEXT(SrcVT, DstVT);
20726  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20727  "Unexpected type for custom-lowering FP_EXTEND");
20728  std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20729  Loc, Chain);
20730  }
20731  }
20732 
20733  return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20734 }
20735 
20736 SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20737  bool IsStrict = Op->isStrictFPOpcode();
20738 
20739  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20740  EVT SrcVT = SrcVal.getValueType();
20741  EVT DstVT = Op.getValueType();
20742  const unsigned DstSz = Op.getValueType().getSizeInBits();
20743  const unsigned SrcSz = SrcVT.getSizeInBits();
20744  (void)DstSz;
20745  assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20746  "Unexpected type for custom-lowering FP_ROUND");
20747 
20748  assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20749  "With both FP DP and 16, any FP conversion is legal!");
20750 
20751  SDLoc Loc(Op);
20752 
20753  // Instruction from 32 -> 16 if hasFP16 is valid
20754  if (SrcSz == 32 && Subtarget->hasFP16())
20755  return Op;
20756 
20757  // Lib call from 32 -> 16 / 64 -> [32, 16]
20758  RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20759  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20760  "Unexpected type for custom-lowering FP_ROUND");
20761  MakeLibCallOptions CallOptions;
20762  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20763  SDValue Result;
20764  std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20765  Loc, Chain);
20766  return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20767 }
20768 
20769 bool
20771  // The ARM target isn't yet aware of offsets.
20772  return false;
20773 }
20774 
20776  if (v == 0xffffffff)
20777  return false;
20778 
20779  // there can be 1's on either or both "outsides", all the "inside"
20780  // bits must be 0's
20781  return isShiftedMask_32(~v);
20782 }
20783 
20784 /// isFPImmLegal - Returns true if the target can instruction select the
20785 /// specified FP immediate natively. If false, the legalizer will
20786 /// materialize the FP immediate as a load from a constant pool.
20788  bool ForCodeSize) const {
20789  if (!Subtarget->hasVFP3Base())
20790  return false;
20791  if (VT == MVT::f16 && Subtarget->hasFullFP16())
20792  return ARM_AM::getFP16Imm(Imm) != -1;
20793  if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20794  ARM_AM::getFP32FP16Imm(Imm) != -1)
20795  return true;
20796  if (VT == MVT::f32)
20797  return ARM_AM::getFP32Imm(Imm) != -1;
20798  if (VT == MVT::f64 && Subtarget->hasFP64())
20799  return ARM_AM::getFP64Imm(Imm) != -1;
20800  return false;
20801 }
20802 
20803 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
20804 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
20805 /// specified in the intrinsic calls.
20807  const CallInst &I,
20808  MachineFunction &MF,
20809  unsigned Intrinsic) const {
20810  switch (Intrinsic) {
20811  case Intrinsic::arm_neon_vld1:
20812  case Intrinsic::arm_neon_vld2:
20813  case Intrinsic::arm_neon_vld3:
20814  case Intrinsic::arm_neon_vld4:
20815  case Intrinsic::arm_neon_vld2lane:
20816  case Intrinsic::arm_neon_vld3lane:
20817  case Intrinsic::arm_neon_vld4lane:
20818  case Intrinsic::arm_neon_vld2dup:
20819  case Intrinsic::arm_neon_vld3dup:
20820  case Intrinsic::arm_neon_vld4dup: {
20822  // Conservatively set memVT to the entire set of vectors loaded.
20823  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20824  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20825  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20826  Info.ptrVal = I.getArgOperand(0);
20827  Info.offset = 0;
20828  Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20829  Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20830  // volatile loads with NEON intrinsics not supported
20832  return true;
20833  }
20834  case Intrinsic::arm_neon_vld1x2:
20835  case Intrinsic::arm_neon_vld1x3:
20836  case Intrinsic::arm_neon_vld1x4: {
20838  // Conservatively set memVT to the entire set of vectors loaded.
20839  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20840  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20841  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20842  Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
20843  Info.offset = 0;
20844  Info.align.reset();
20845  // volatile loads with NEON intrinsics not supported
20847  return true;
20848  }
20849  case Intrinsic::arm_neon_vst1:
20850  case Intrinsic::arm_neon_vst2:
20851  case Intrinsic::arm_neon_vst3:
20852  case Intrinsic::arm_neon_vst4:
20853  case Intrinsic::arm_neon_vst2lane:
20854  case Intrinsic::arm_neon_vst3lane:
20855  case Intrinsic::arm_neon_vst4lane: {
20856  Info.opc = ISD::INTRINSIC_VOID;
20857  // Conservatively set memVT to the entire set of vectors stored.
20858  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20859  unsigned NumElts = 0;
20860  for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20861  Type *ArgTy = I.getArgOperand(ArgI)->getType();
20862  if (!ArgTy->isVectorTy())
20863  break;
20864  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20865  }
20866  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20867  Info.ptrVal = I.getArgOperand(0);
20868  Info.offset = 0;
20869  Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20870  Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20871  // volatile stores with NEON intrinsics not supported
20873  return true;
20874  }
20875  case Intrinsic::arm_neon_vst1x2:
20876  case Intrinsic::arm_neon_vst1x3:
20877  case Intrinsic::arm_neon_vst1x4: {
20878  Info.opc = ISD::INTRINSIC_VOID;
20879  // Conservatively set memVT to the entire set of vectors stored.
20880  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20881  unsigned NumElts = 0;
20882  for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20883  Type *ArgTy = I.getArgOperand(ArgI)->getType();
20884  if (!ArgTy->isVectorTy())
20885  break;
20886  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20887  }
20888  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20889  Info.ptrVal = I.getArgOperand(0);
20890  Info.offset = 0;
20891  Info.align.reset();
20892  // volatile stores with NEON intrinsics not supported
20894  return true;
20895  }
20896  case Intrinsic::arm_mve_vld2q:
20897  case Intrinsic::arm_mve_vld4q: {
20899  // Conservatively set memVT to the entire set of vectors loaded.
20900  Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
20901  unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
20902  Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
20903  Info.ptrVal = I.getArgOperand(0);
20904  Info.offset = 0;
20905  Info.align = Align(VecTy->getScalarSizeInBits() / 8);
20906  // volatile loads with MVE intrinsics not supported
20908  return true;
20909  }
20910  case Intrinsic::arm_mve_vst2q:
20911  case Intrinsic::arm_mve_vst4q: {
20912  Info.opc = ISD::INTRINSIC_VOID;
20913  // Conservatively set memVT to the entire set of vectors stored.
20914  Type *VecTy = I.getArgOperand(1)->getType();
20915  unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
20916  Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
20917  Info.ptrVal = I.getArgOperand(0);
20918  Info.offset = 0;
20919  Info.align = Align(VecTy->getScalarSizeInBits() / 8);
20920  // volatile stores with MVE intrinsics not supported
20922  return true;
20923  }
20924  case Intrinsic::arm_mve_vldr_gather_base:
20925  case Intrinsic::arm_mve_vldr_gather_base_predicated: {
20927  Info.ptrVal = nullptr;
20928  Info.memVT = MVT::getVT(I.getType());
20929  Info.align = Align(1);
20931  return true;
20932  }
20933  case Intrinsic::arm_mve_vldr_gather_base_wb:
20934  case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
20936  Info.ptrVal = nullptr;
20937  Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
20938  Info.align = Align(1);
20940  return true;
20941  }
20942  case Intrinsic::arm_mve_vldr_gather_offset:
20943  case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
20945  Info.ptrVal = nullptr;
20946  MVT DataVT = MVT::getVT(I.getType());
20947  unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
20948  Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
20949  DataVT.getVectorNumElements());
20950  Info.align = Align(1);
20952  return true;
20953  }
20954  case Intrinsic::arm_mve_vstr_scatter_base:
20955  case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
20956  Info.opc = ISD::INTRINSIC_VOID;
20957  Info.ptrVal = nullptr;
20958  Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
20959  Info.align = Align(1);
20961  return true;
20962  }
20963  case Intrinsic::arm_mve_vstr_scatter_base_wb:
20964  case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
20966  Info.ptrVal = nullptr;
20967  Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
20968  Info.align = Align(1);
20970  return true;
20971  }
20972  case Intrinsic::arm_mve_vstr_scatter_offset:
20973  case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
20974  Info.opc = ISD::INTRINSIC_VOID;
20975  Info.ptrVal = nullptr;
20976  MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
20977  unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
20978  Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
20979  DataVT.getVectorNumElements());
20980  Info.align = Align(1);
20982  return true;
20983  }
20984  case Intrinsic::arm_ldaex:
20985  case Intrinsic::arm_ldrex: {
20986  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20987  Type *ValTy = I.getParamElementType(0);
20989  Info.memVT = MVT::getVT(ValTy);
20990  Info.ptrVal = I.getArgOperand(0);
20991  Info.offset = 0;
20992  Info.align = DL.getABITypeAlign(ValTy);
20994  return true;
20995  }
20996  case Intrinsic::arm_stlex:
20997  case Intrinsic::arm_strex: {
20998  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20999  Type *ValTy = I.getParamElementType(1);
21001  Info.memVT = MVT::getVT(ValTy);
21002  Info.ptrVal = I.getArgOperand(1);
21003  Info.offset = 0;
21004  Info.align = DL.getABITypeAlign(ValTy);
21006  return true;
21007  }
21008  case Intrinsic::arm_stlexd:
21009  case Intrinsic::arm_strexd:
21011  Info.memVT = MVT::i64;
21012  Info.ptrVal = I.getArgOperand(2);
21013  Info.offset = 0;
21014  Info.align = Align(8);
21016  return true;
21017 
21018  case Intrinsic::arm_ldaexd:
21019  case Intrinsic::arm_ldrexd:
21021  Info.memVT = MVT::i64;
21022  Info.ptrVal = I.getArgOperand(0);
21023  Info.offset = 0;
21024  Info.align = Align(8);
21026  return true;
21027 
21028  default:
21029  break;
21030  }
21031 
21032  return false;
21033 }
21034 
21035 /// Returns true if it is beneficial to convert a load of a constant
21036 /// to just the constant itself.
21038  Type *Ty) const {
21039  assert(Ty->isIntegerTy());
21040 
21041  unsigned Bits = Ty->getPrimitiveSizeInBits();
21042  if (Bits == 0 || Bits > 32)
21043  return false;
21044  return true;
21045 }
21046 
21048  unsigned Index) const {
21050  return false;
21051 
21052  return (Index == 0 || Index == ResVT.getVectorNumElements());
21053 }
21054 
21056  ARM_MB::MemBOpt Domain) const {
21057  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21058 
21059  // First, if the target has no DMB, see what fallback we can use.
21060  if (!Subtarget->hasDataBarrier()) {
21061  // Some ARMv6 cpus can support data barriers with an mcr instruction.
21062  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21063  // here.
21064  if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21065  Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
21066  Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21067  Builder.getInt32(0), Builder.getInt32(7),
21068  Builder.getInt32(10), Builder.getInt32(5)};
21069  return Builder.CreateCall(MCR, args);
21070  } else {
21071  // Instead of using barriers, atomic accesses on these subtargets use
21072  // libcalls.
21073  llvm_unreachable("makeDMB on a target so old that it has no barriers");
21074  }
21075  } else {
21076  Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
21077  // Only a full system barrier exists in the M-class architectures.
21078  Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21079  Constant *CDomain = Builder.getInt32(Domain);
21080  return Builder.CreateCall(DMB, CDomain);
21081  }
21082 }
21083 
21084 // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21086  Instruction *Inst,
21087  AtomicOrdering Ord) const {
21088  switch (Ord) {
21091  llvm_unreachable("Invalid fence: unordered/non-atomic");
21094  return nullptr; // Nothing to do
21096  if (!Inst->hasAtomicStore())
21097  return nullptr; // Nothing to do
21098  [[fallthrough]];
21101  if (Subtarget->preferISHSTBarriers())
21102  return makeDMB(Builder, ARM_MB::ISHST);
21103  // FIXME: add a comment with a link to documentation justifying this.
21104  else
21105  return makeDMB(Builder, ARM_MB::ISH);
21106  }
21107  llvm_unreachable("Unknown fence ordering in emitLeadingFence");
21108 }
21109 
21111  Instruction *Inst,
21112  AtomicOrdering Ord) const {
21113  switch (Ord) {
21116  llvm_unreachable("Invalid fence: unordered/not-atomic");
21119  return nullptr; // Nothing to do
21123  return makeDMB(Builder, ARM_MB::ISH);
21124  }
21125  llvm_unreachable("Unknown fence ordering in emitTrailingFence");
21126 }
21127 
21128 // Loads and stores less than 64-bits are already atomic; ones above that
21129 // are doomed anyway, so defer to the default libcall and blame the OS when
21130 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21131 // anything for those.
21134  bool has64BitAtomicStore;
21135  if (Subtarget->isMClass())
21136  has64BitAtomicStore = false;
21137  else if (Subtarget->isThumb())
21138  has64BitAtomicStore = Subtarget->hasV7Ops();
21139  else
21140  has64BitAtomicStore = Subtarget->hasV6Ops();
21141 
21142  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21143  return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21145 }
21146 
21147 // Loads and stores less than 64-bits are already atomic; ones above that
21148 // are doomed anyway, so defer to the default libcall and blame the OS when
21149 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21150 // anything for those.
21151 // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21152 // guarantee, see DDI0406C ARM architecture reference manual,
21153 // sections A8.8.72-74 LDRD)
21156  bool has64BitAtomicLoad;
21157  if (Subtarget->isMClass())
21158  has64BitAtomicLoad = false;
21159  else if (Subtarget->isThumb())
21160  has64BitAtomicLoad = Subtarget->hasV7Ops();
21161  else
21162  has64BitAtomicLoad = Subtarget->hasV6Ops();
21163 
21164  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21165  return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21167 }
21168 
21169 // For the real atomic operations, we have ldrex/strex up to 32 bits,
21170 // and up to 64 bits on the non-M profiles
21173  if (AI->isFloatingPointOperation())
21175 
21176  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21177  bool hasAtomicRMW;
21178  if (Subtarget->isMClass())
21179  hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21180  else if (Subtarget->isThumb())
21181  hasAtomicRMW = Subtarget->hasV7Ops();
21182  else
21183  hasAtomicRMW = Subtarget->hasV6Ops();
21184  if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21185  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21186  // implement atomicrmw without spilling. If the target address is also on
21187  // the stack and close enough to the spill slot, this can lead to a
21188  // situation where the monitor always gets cleared and the atomic operation
21189  // can never succeed. So at -O0 lower this operation to a CAS loop.
21190  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
21193  }
21195 }
21196 
21197 // Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21198 // bits, and up to 64 bits on the non-M profiles.
21201  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21202  // implement cmpxchg without spilling. If the address being exchanged is also
21203  // on the stack and close enough to the spill slot, this can lead to a
21204  // situation where the monitor always gets cleared and the atomic operation
21205  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21206  unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21207  bool HasAtomicCmpXchg;
21208  if (Subtarget->isMClass())
21209  HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21210  else if (Subtarget->isThumb())
21211  HasAtomicCmpXchg = Subtarget->hasV7Ops();
21212  else
21213  HasAtomicCmpXchg = Subtarget->hasV6Ops();
21214  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
21215  Size <= (Subtarget->isMClass() ? 32U : 64U))
21218 }
21219 
21221  const Instruction *I) const {
21222  return InsertFencesForAtomic;
21223 }
21224 
21226  // ROPI/RWPI are not supported currently.
21227  return !Subtarget->isROPI() && !Subtarget->isRWPI();
21228 }
21229 
21231  if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21233 
21234  // MSVC CRT has a global variable holding security cookie.
21235  M.getOrInsertGlobal("__security_cookie",
21236  Type::getInt8PtrTy(M.getContext()));
21237 
21238  // MSVC CRT has a function to validate security cookie.
21239  FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21240  "__security_check_cookie", Type::getVoidTy(M.getContext()),
21241  Type::getInt8PtrTy(M.getContext()));
21242  if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21243  F->addParamAttr(0, Attribute::AttrKind::InReg);
21244 }
21245 
21247  // MSVC CRT has a global variable holding security cookie.
21248  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21249  return M.getGlobalVariable("__security_cookie");
21251 }
21252 
21254  // MSVC CRT has a function to validate security cookie.
21255  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21256  return M.getFunction("__security_check_cookie");
21258 }
21259 
21261  unsigned &Cost) const {
21262  // If we do not have NEON, vector types are not natively supported.
21263  if (!Subtarget->hasNEON())
21264  return false;
21265 
21266  // Floating point values and vector values map to the same register file.
21267  // Therefore, although we could do a store extract of a vector type, this is
21268  // better to leave at float as we have more freedom in the addressing mode for
21269  // those.
21270  if (VectorTy->isFPOrFPVectorTy())
21271  return false;
21272 
21273  // If the index is unknown at compile time, this is very expensive to lower
21274  // and it is not possible to combine the store with the extract.
21275  if (!isa<ConstantInt>(Idx))
21276  return false;
21277 
21278  assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21279  unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21280  // We can do a store + vector extract on any vector that fits perfectly in a D
21281  // or Q register.
21282  if (BitWidth == 64 || BitWidth == 128) {
21283  Cost = 0;
21284  return true;
21285  }
21286  return false;
21287 }
21288 
21290  return Subtarget->hasV6T2Ops();
21291 }
21292 
21294  return Subtarget->hasV6T2Ops();
21295 }
21296 
21298  const Instruction &AndI) const {
21299  if (!Subtarget->hasV7Ops())
21300  return false;
21301 
21302  // Sink the `and` instruction only if the mask would fit into a modified
21303  // immediate operand.
21304  ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
21305  if (!Mask || Mask->getValue().getBitWidth() > 32u)
21306  return false;
21307  auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21308  return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21309  : ARM_AM::getSOImmVal(MaskVal)) != -1;
21310 }
21311 
21314  SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21315  if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21318  ExpansionFactor);
21319 }
21320 
21322  Value *Addr,
21323  AtomicOrdering Ord) const {
21324  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21325  bool IsAcquire = isAcquireOrStronger(Ord);
21326 
21327  // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21328  // intrinsic must return {i32, i32} and we have to recombine them into a
21329  // single i64 here.
21330  if (ValueTy->getPrimitiveSizeInBits() == 64) {
21331  Intrinsic::ID Int =
21332  IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21334 
21335  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
21336  Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
21337 
21338  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21339  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21340  if (!Subtarget->isLittle())
21341  std::swap (Lo, Hi);
21342  Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21343  Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21344  return Builder.CreateOr(
21345  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21346  }
21347 
21348  Type *Tys[] = { Addr->getType() };
21349  Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21350  Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
21351  CallInst *CI = Builder.CreateCall(Ldrex, Addr);
21352 
21353  CI->addParamAttr(
21354  0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21355  return Builder.CreateTruncOrBitCast(CI, ValueTy);
21356 }
21357 
21359  IRBuilderBase &Builder) const {
21360  if (!Subtarget->hasV7Ops())
21361  return;
21362  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21363  Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
21364 }
21365 
21367  Value *Val, Value *Addr,
21368  AtomicOrdering Ord) const {
21369  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21370  bool IsRelease = isReleaseOrStronger(Ord);
21371 
21372  // Since the intrinsics must have legal type, the i64 intrinsics take two
21373  // parameters: "i32, i32". We must marshal Val into the appropriate form
21374  // before the call.
21375  if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21376  Intrinsic::ID Int =
21377  IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21379  Type *Int32Ty = Type::getInt32Ty(M->getContext());
21380 
21381  Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21382  Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21383  if (!Subtarget->isLittle())
21384  std::swap(Lo, Hi);
21385  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
21386  return Builder.CreateCall(Strex, {Lo, Hi, Addr});
21387  }
21388 
21389  Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21390  Type *Tys[] = { Addr->getType() };
21391  Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
21392 
21393  CallInst *CI = Builder.CreateCall(
21394  Strex, {Builder.CreateZExtOrBitCast(
21395  Val, Strex->getFunctionType()->getParamType(0)),
21396  Addr});
21397  CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21398  Val->getType()));
21399  return CI;
21400 }
21401 
21402 
21404  return Subtarget->isMClass();
21405 }
21406 
21407 /// A helper function for determining the number of interleaved accesses we
21408 /// will generate when lowering accesses of the given type.
21409 unsigned
21411  const DataLayout &DL) const {
21412  return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21413 }
21414 
21416  unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21417  const DataLayout &DL) const {
21418 
21419  unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21420  unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21421 
21422  if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21423  return false;
21424 
21425  // Ensure the vector doesn't have f16 elements. Even though we could do an
21426  // i16 vldN, we can't hold the f16 vectors and will end up converting via
21427  // f32.
21428  if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21429  return false;
21430  if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21431  return false;
21432 
21433  // Ensure the number of vector elements is greater than 1.
21434  if (VecTy->getNumElements() < 2)
21435  return false;
21436 
21437  // Ensure the element type is legal.
21438  if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21439  return false;
21440  // And the alignment if high enough under MVE.
21441  if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21442  return false;
21443 
21444  // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21445  // 128 will be split into multiple interleaved accesses.
21446  if (Subtarget->hasNEON() && VecSize == 64)
21447  return true;
21448  return VecSize % 128 == 0;
21449 }
21450 
21452  if (Subtarget->hasNEON())
21453  return 4;
21454  if (Subtarget->hasMVEIntegerOps())
21457 }
21458 
21459 /// Lower an interleaved load into a vldN intrinsic.
21460 ///
21461 /// E.g. Lower an interleaved load (Factor = 2):
21462 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21463 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21464 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21465 ///
21466 /// Into:
21467 /// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21468 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21469 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21472  ArrayRef<unsigned> Indices, unsigned Factor) const {
21473  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21474  "Invalid interleave factor");
21475  assert(!Shuffles.empty() && "Empty shufflevector input");
21476  assert(Shuffles.size() == Indices.size() &&
21477  "Unmatched number of shufflevectors and indices");
21478 
21479  auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21480  Type *EltTy = VecTy->getElementType();
21481 
21482  const DataLayout &DL = LI->getModule()->getDataLayout();
21483  Align Alignment = LI->getAlign();
21484 
21485  // Skip if we do not have NEON and skip illegal vector types. We can
21486  // "legalize" wide vector types into multiple interleaved accesses as long as
21487  // the vector types are divisible by 128.
21488  if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21489  return false;
21490 
21491  unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21492 
21493  // A pointer vector can not be the return type of the ldN intrinsics. Need to
21494  // load integer vectors first and then convert to pointer vectors.
21495  if (EltTy->isPointerTy())
21496  VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21497 
21498  IRBuilder<> Builder(LI);
21499 
21500  // The base address of the load.
21501  Value *BaseAddr = LI->getPointerOperand();
21502 
21503  if (NumLoads > 1) {
21504  // If we're going to generate more than one load, reset the sub-vector type
21505  // to something legal.
21506  VecTy = FixedVectorType::get(VecTy->getElementType(),
21507  VecTy->getNumElements() / NumLoads);
21508 
21509  // We will compute the pointer operand of each load from the original base
21510  // address using GEPs. Cast the base address to a pointer to the scalar
21511  // element type.
21512  BaseAddr = Builder.CreateBitCast(
21513  BaseAddr,
21514  VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
21515  }
21516 
21517  assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21518 
21519  auto createLoadIntrinsic = [&](Value *BaseAddr) {
21520  if (Subtarget->hasNEON()) {
21521  Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
21522  Type *Tys[] = {VecTy, Int8Ptr};
21523  static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21524  Intrinsic::arm_neon_vld3,
21525  Intrinsic::arm_neon_vld4};
21526  Function *VldnFunc =
21527  Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
21528 
21530  Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
21531  Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21532 
21533  return Builder.CreateCall(VldnFunc, Ops, "vldN");
21534  } else {
21535  assert((Factor == 2 || Factor == 4) &&
21536  "expected interleave factor of 2 or 4 for MVE");
21537  Intrinsic::ID LoadInts =
21538  Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21539  Type *VecEltTy =
21540  VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace());
21541  Type *Tys[] = {VecTy, VecEltTy};
21542  Function *VldnFunc =
21543  Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
21544 
21546  Ops.push_back(Builder.CreateBitCast(BaseAddr, VecEltTy));
21547  return Builder.CreateCall(VldnFunc, Ops, "vldN");
21548  }
21549  };
21550 
21551  // Holds sub-vectors extracted from the load intrinsic return values. The
21552  // sub-vectors are associated with the shufflevector instructions they will
21553  // replace.
21555 
21556  for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21557  // If we're generating more than one load, compute the base address of
21558  // subsequent loads as an offset from the previous.
21559  if (LoadCount > 0)
21560  BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21561  VecTy->getNumElements() * Factor);
21562 
21563  CallInst *VldN = createLoadIntrinsic(BaseAddr);
21564 
21565  // Replace uses of each shufflevector with the corresponding vector loaded
21566  // by ldN.
21567  for (unsigned i = 0; i < Shuffles.size(); i++) {
21568  ShuffleVectorInst *SV = Shuffles[i];
21569  unsigned Index = Indices[i];
21570 
21571  Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21572 
21573  // Convert the integer vector to pointer vector if the element is pointer.
21574  if (EltTy->isPointerTy())
21575  SubVec = Builder.CreateIntToPtr(
21576  SubVec,
21577  FixedVectorType::get(SV->getType()->getElementType(), VecTy));
21578 
21579  SubVecs[SV].push_back(SubVec);
21580  }
21581  }
21582 
21583  // Replace uses of the shufflevector instructions with the sub-vectors
21584  // returned by the load intrinsic. If a shufflevector instruction is
21585  // associated with more than one sub-vector, those sub-vectors will be
21586  // concatenated into a single wide vector.
21587  for (ShuffleVectorInst *SVI : Shuffles) {
21588  auto &SubVec = SubVecs[SVI];
21589  auto *WideVec =
21590  SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21591  SVI->replaceAllUsesWith(WideVec);
21592  }
21593 
21594  return true;
21595 }
21596 
21597 /// Lower an interleaved store into a vstN intrinsic.
21598 ///
21599 /// E.g. Lower an interleaved store (Factor = 3):
21600 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21601 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21602 /// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21603 ///
21604 /// Into:
21605 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21606 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21607 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21608 /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21609 ///
21610 /// Note that the new shufflevectors will be removed and we'll only generate one
21611 /// vst3 instruction in CodeGen.
21612 ///
21613 /// Example for a more general valid mask (Factor 3). Lower:
21614 /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21615 /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21616 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
21617 ///
21618 /// Into:
21619 /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21620 /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21621 /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21622 /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21624  ShuffleVectorInst *SVI,
21625  unsigned Factor) const {
21626  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21627  "Invalid interleave factor");
21628 
21629  auto *VecTy = cast<FixedVectorType>(SVI->getType());
21630  assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21631 
21632  unsigned LaneLen = VecTy->getNumElements() / Factor;
21633  Type *EltTy = VecTy->getElementType();
21634  auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21635 
21636  const DataLayout &DL = SI->getModule()->getDataLayout();
21637  Align Alignment = SI->getAlign();
21638 
21639  // Skip if we do not have NEON and skip illegal vector types. We can
21640  // "legalize" wide vector types into multiple interleaved accesses as long as
21641  // the vector types are divisible by 128.
21642  if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21643  return false;
21644 
21645  unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21646 
21647  Value *Op0 = SVI->getOperand(0);
21648  Value *Op1 = SVI->getOperand(1);
21650 
21651  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21652  // vectors to integer vectors.
21653  if (EltTy->isPointerTy()) {
21654  Type *IntTy = DL.getIntPtrType(EltTy);
21655 
21656  // Convert to the corresponding integer vector.
21657  auto *IntVecTy =
21658  FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21659  Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21660  Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21661 
21662  SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21663  }
21664 
21665  // The base address of the store.
21666  Value *BaseAddr = SI->getPointerOperand();
21667 
21668  if (NumStores > 1) {
21669  // If we're going to generate more than one store, reset the lane length
21670  // and sub-vector type to something legal.
21671  LaneLen /= NumStores;
21672  SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21673 
21674  // We will compute the pointer operand of each store from the original base
21675  // address using GEPs. Cast the base address to a pointer to the scalar
21676  // element type.
21677  BaseAddr = Builder.CreateBitCast(
21678  BaseAddr,
21679  SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
21680  }
21681 
21682  assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21683 
21684  auto Mask = SVI->getShuffleMask();
21685 
21686  auto createStoreIntrinsic = [&](Value *BaseAddr,
21687  SmallVectorImpl<Value *> &Shuffles) {
21688  if (Subtarget->hasNEON()) {
21689  static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21690  Intrinsic::arm_neon_vst3,
21691  Intrinsic::arm_neon_vst4};
21692  Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
21693  Type *Tys[] = {Int8Ptr, SubVecTy};
21694 
21695  Function *VstNFunc = Intrinsic::getDeclaration(
21696  SI->getModule(), StoreInts[Factor - 2], Tys);
21697 
21699  Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
21700  append_range(Ops, Shuffles);
21701  Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21702  Builder.CreateCall(VstNFunc, Ops);
21703  } else {
21704  assert((Factor == 2 || Factor == 4) &&
21705  "expected interleave factor of 2 or 4 for MVE");
21706  Intrinsic::ID StoreInts =
21707  Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21708  Type *EltPtrTy = SubVecTy->getElementType()->getPointerTo(
21709  SI->getPointerAddressSpace());
21710  Type *Tys[] = {EltPtrTy, SubVecTy};
21711  Function *VstNFunc =
21712  Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
21713 
21715  Ops.push_back(Builder.CreateBitCast(BaseAddr, EltPtrTy));
21716  append_range(Ops, Shuffles);
21717  for (unsigned F = 0; F < Factor; F++) {
21718  Ops.push_back(Builder.getInt32(F));
21719  Builder.CreateCall(VstNFunc, Ops);
21720  Ops.pop_back();
21721  }
21722  }
21723  };
21724 
21725  for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21726  // If we generating more than one store, we compute the base address of
21727  // subsequent stores as an offset from the previous.
21728  if (StoreCount > 0)
21729  BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21730  BaseAddr, LaneLen * Factor);
21731 
21732  SmallVector<Value *, 4> Shuffles;
21733 
21734  // Split the shufflevector operands into sub vectors for the new vstN call.
21735  for (unsigned i = 0; i < Factor; i++) {
21736  unsigned IdxI = StoreCount * LaneLen * Factor + i;
21737  if (Mask[IdxI] >= 0) {
21738  Shuffles.push_back(Builder.CreateShuffleVector(
21739  Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21740  } else {
21741  unsigned StartMask = 0;
21742  for (unsigned j = 1; j < LaneLen; j++) {
21743  unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21744  if (Mask[IdxJ * Factor + IdxI] >= 0) {
21745  StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21746  break;
21747  }
21748  }
21749  // Note: If all elements in a chunk are undefs, StartMask=0!
21750  // Note: Filling undef gaps with random elements is ok, since
21751  // those elements were being written anyway (with undefs).
21752  // In the case of all undefs we're defaulting to using elems from 0
21753  // Note: StartMask cannot be negative, it's checked in
21754  // isReInterleaveMask
21755  Shuffles.push_back(Builder.CreateShuffleVector(
21756  Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21757  }
21758  }
21759 
21760  createStoreIntrinsic(BaseAddr, Shuffles);
21761  }
21762  return true;
21763 }
21764 
21771 };
21772 
21773 static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
21774  uint64_t &Members) {
21775  if (auto *ST = dyn_cast<StructType>(Ty)) {
21776  for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21777  uint64_t SubMembers = 0;
21778  if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21779  return false;
21780  Members += SubMembers;
21781  }
21782  } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21783  uint64_t SubMembers = 0;
21784  if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21785  return false;
21786  Members += SubMembers * AT->getNumElements();
21787  } else if (Ty->isFloatTy()) {
21788  if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21789  return false;
21790  Members = 1;
21791  Base = HA_FLOAT;
21792  } else if (Ty->isDoubleTy()) {
21793  if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21794  return false;
21795  Members = 1;
21796  Base = HA_DOUBLE;
21797  } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21798  Members = 1;
21799  switch (Base) {
21800  case HA_FLOAT:
21801  case HA_DOUBLE:
21802  return false;
21803  case HA_VECT64:
21804  return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21805  case HA_VECT128:
21806  return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21807  case HA_UNKNOWN:
21808  switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21809  case 64:
21810  Base = HA_VECT64;
21811  return true;
21812  case 128:
21813  Base = HA_VECT128;
21814  return true;
21815  default:
21816  return false;
21817  }
21818  }
21819  }
21820 
21821  return (Members > 0 && Members <= 4);
21822 }
21823 
21824 /// Return the correct alignment for the current calling convention.
21826  Type *ArgTy, const DataLayout &DL) const {
21827  const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
21828  if (!ArgTy->isVectorTy())
21829  return ABITypeAlign;
21830 
21831  // Avoid over-aligning vector parameters. It would require realigning the
21832  // stack and waste space for no real benefit.
21833  return std::min(ABITypeAlign, DL.getStackAlignment());
21834 }
21835 
21836 /// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
21837 /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
21838 /// passing according to AAPCS rules.
21840  Type *Ty, CallingConv::ID CallConv, bool isVarArg,
21841  const DataLayout &DL) const {
21842  if (getEffectiveCallingConv(CallConv, isVarArg) !=
21844  return false;
21845 
21847  uint64_t Members = 0;
21848  bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
21849  LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
21850 
21851  bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
21852  return IsHA || IsIntArray;
21853 }
21854 
21856  const Constant *PersonalityFn) const {
21857  // Platforms which do not use SjLj EH may return values in these registers
21858  // via the personality function.
21859  return Subtarget->useSjLjEH() ? Register() : ARM::R0;
21860 }
21861 
21863  const Constant *PersonalityFn) const {
21864  // Platforms which do not use SjLj EH may return values in these registers
21865  // via the personality function.
21866  return Subtarget->useSjLjEH() ? Register() : ARM::R1;
21867 }
21868 
21869 void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
21870  // Update IsSplitCSR in ARMFunctionInfo.
21871  ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
21872  AFI->setIsSplitCSR(true);
21873 }
21874 
21875 void ARMTargetLowering::insertCopiesSplitCSR(
21876  MachineBasicBlock *Entry,
21877  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
21878  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
21879  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
21880  if (!IStart)
21881  return;
21882 
21883  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
21884  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
21885  MachineBasicBlock::iterator MBBI = Entry->begin();
21886  for (const MCPhysReg *I = IStart; *I; ++I) {
21887  const TargetRegisterClass *RC = nullptr;
21888  if (ARM::GPRRegClass.contains(*I))
21889  RC = &ARM::GPRRegClass;
21890  else if (ARM::DPRRegClass.contains(*I))
21891  RC = &ARM::DPRRegClass;
21892  else
21893  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
21894 
21895  Register NewVR = MRI->createVirtualRegister(RC);
21896  // Create copy from CSR to a virtual register.
21897  // FIXME: this currently does not emit CFI pseudo-instructions, it works
21898  // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
21899  // nounwind. If we want to generalize this later, we may need to emit
21900  // CFI pseudo-instructions.
21901  assert(Entry->getParent()->getFunction().hasFnAttribute(
21902  Attribute::NoUnwind) &&
21903  "Function should be nounwind in insertCopiesSplitCSR!");
21904  Entry->addLiveIn(*I);
21905  BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
21906  .addReg(*I);
21907 
21908  // Insert the copy-back instructions right before the terminator.
21909  for (auto *Exit : Exits)
21910  BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
21911  TII->get(TargetOpcode::COPY), *I)
21912  .addReg(NewVR);
21913  }
21914 }
21915 
21919 }
21920 
21922  return Subtarget->hasMVEIntegerOps();
21923 }
21924 
21927  auto *VTy = dyn_cast<FixedVectorType>(Ty);
21928  if (!VTy)
21929  return false;
21930 
21931  auto *ScalarTy = VTy->getScalarType();
21932  unsigned NumElements = VTy->getNumElements();
21933 
21934  unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
21935  if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
21936  return false;
21937 
21938  // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
21939  if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
21940  return Subtarget->hasMVEFloatOps();
21941 
21943  return false;
21944 
21945  return Subtarget->hasMVEIntegerOps() &&
21946  (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
21947  ScalarTy->isIntegerTy(32));
21948 }
21949 
21952  ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
21953  Value *Accumulator) const {
21954 
21955  FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
21956 
21957  IRBuilder<> B(I);
21958 
21959  unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
21960 
21961  assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
21962 
21963  if (TyWidth > 128) {
21964  int Stride = Ty->getNumElements() / 2;
21965  auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
21966  auto SplitSeqVec = llvm::to_vector(SplitSeq);
21967  ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
21968  ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
21969 
21970  auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
21971  auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
21972  auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
21973  auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
21974  Value *LowerSplitAcc = nullptr;
21975  Value *UpperSplitAcc = nullptr;
21976 
21977  if (Accumulator) {
21978  LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
21979  UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
21980  }
21981 
21982  auto *LowerSplitInt = createComplexDeinterleavingIR(
21983  I, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
21984  auto *UpperSplitInt = createComplexDeinterleavingIR(
21985  I, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
21986 
21987  ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
21988  return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
21989  }
21990 
21991  auto *IntTy = Type::getInt32Ty(B.getContext());
21992 
21993  ConstantInt *ConstRotation = nullptr;
21994  if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
21995  ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
21996 
21997  if (Accumulator)
21998  return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
21999  {ConstRotation, Accumulator, InputB, InputA});
22000  return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22001  {ConstRotation, InputB, InputA});
22002  }
22003 
22004  if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22005  // 1 means the value is not halved.
22006  auto *ConstHalving = ConstantInt::get(IntTy, 1);
22007 
22009  ConstRotation = ConstantInt::get(IntTy, 0);
22010  else if (Rotation == ComplexDeinterleavingRotation::Rotation_270)
22011  ConstRotation = ConstantInt::get(IntTy, 1);
22012 
22013  if (!ConstRotation)
22014  return nullptr; // Invalid rotation for arm_mve_vcaddq
22015 
22016  return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22017  {ConstHalving, ConstRotation, InputA, InputB});
22018  }
22019 
22020  return nullptr;
22021 }
llvm::ARMTargetLowering::isLegalT1ScaledAddressingMode
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
Definition: ARMISelLowering.cpp:19439
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::ARMTargetLowering::isZExtFree
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Definition: ARMISelLowering.cpp:19014
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:916
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:130
RCPair
std::pair< unsigned, const TargetRegisterClass * > RCPair
Definition: ARMISelLowering.cpp:20222
llvm::ARMISD::VADDLVs
@ VADDLVs
Definition: ARMISelLowering.h:238
OP_VTRNL
@ OP_VTRNL
Definition: ARMISelLowering.cpp:8324
llvm::SDNode::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Definition: SelectionDAGNodes.h:1627
llvm::ARMISD::VLD3DUP_UPD
@ VLD3DUP_UPD
Definition: ARMISelLowering.h:340
OP_VDUP0
@ OP_VDUP0
Definition: ARMISelLowering.cpp:8313
llvm::TargetLowering::SimplifyDemandedBitsForTargetNode
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
Definition: TargetLowering.cpp:3545
i
i
Definition: README.txt:29
ARMSubtarget.h
llvm::ISD::STRICT_FP_ROUND
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:464
PerformMULCombine
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:14057
llvm::ARMTargetLowering::LowerXConstraint
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
Definition: ARMISelLowering.cpp:20139
LowerPREFETCH
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:4262
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1447
LowerVectorINT_TO_FP
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:5943
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:247
llvm::ARMTargetLowering::shouldAlignPointerArgs
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Definition: ARMISelLowering.cpp:1923
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
PerformCSETCombine
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:14867
llvm::ARMSubtarget::useSjLjEH
bool useSjLjEH() const
Definition: ARMSubtarget.h:348
llvm::TargetLoweringBase::ShiftLegalizationStrategy::LowerToLibcall
@ LowerToLibcall
PerformADDECombine
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
Definition: ARMISelLowering.cpp:13374
llvm::TargetLoweringBase::MaxStoresPerMemsetOptSize
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3438
PerformLOADCombine
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:16361
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:36
llvm::MSP430ISD::RRC
@ RRC
Y = RRC X, rotate right via carry.
Definition: MSP430ISelLowering.h:36
llvm::ARMTargetLowering::getRegClassFor
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
Definition: ARMISelLowering.cpp:1899
ValueTypes.h
PerformSUBCombine
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
Definition: ARMISelLowering.cpp:13908
llvm::ARMISD::VMOVN
@ VMOVN
Definition: ARMISelLowering.h:213
isValidBaseUpdate
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
Definition: ARMISelLowering.cpp:15958
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::ShuffleVectorSDNode::getSplatIndex
int getSplatIndex() const
Definition: SelectionDAGNodes.h:1552
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1586
llvm::ARMSubtarget::hasBaseDSP
bool hasBaseDSP() const
Definition: ARMSubtarget.h:349
llvm::ISD::SETCCCARRY
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:744
BitsProperlyConcatenate
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
Definition: ARMISelLowering.cpp:14708
llvm::ARMISD::RRX
@ RRX
Definition: ARMISelLowering.h:107
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2398
llvm::AtomicOrdering::AcquireRelease
@ AcquireRelease
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:475
llvm::RISCVAttrs::StackAlign
StackAlign
Definition: RISCVAttributes.h:37
llvm::isConstOrConstSplat
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
Definition: SelectionDAG.cpp:11031
llvm::ARMFunctionInfo
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
Definition: ARMMachineFunctionInfo.h:29
llvm::ARMTargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
Definition: ARMISelLowering.cpp:1881
llvm::ARMISD::QSUB16b
@ QSUB16b
Definition: ARMISelLowering.h:284
PerformExtractEltCombine
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:15396
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2309
GeneratePerfectShuffle
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
Definition: ARMISelLowering.cpp:8401
getLdOpcode
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
Definition: ARMISelLowering.cpp:11178
llvm::ARMISD::VMLALVs
@ VMLALVs
Definition: ARMISelLowering.h:250
OP_VZIPL
@ OP_VZIPL
Definition: ARMISelLowering.cpp:8322
llvm::ARMII::TOF
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:242
llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:586
llvm::ARMISD::SMLAL
@ SMLAL
Definition: ARMISelLowering.h:266
llvm::SDValue::dump
void dump() const
Definition: SelectionDAGNodes.h:1193
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:548
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4884
llvm::ARM_AM::isThumbImmShiftedVal
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
Definition: ARMAddressingModes.h:221
llvm::ARMBaseRegisterInfo::getCalleeSavedRegs
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Definition: ARMBaseRegisterInfo.cpp:63
llvm::SelectionDAG::getCALLSEQ_START
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:1022
llvm::ISD::isSignedIntSetCC
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1467
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1458
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::ARMTargetLowering::insertSSPDeclarations
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: ARMISelLowering.cpp:21230
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1443
llvm::SDUse
Represents a use of a SDNode.
Definition: SelectionDAGNodes.h:284
llvm::MVT::v4f16
@ v4f16
Definition: MachineValueType.h:149
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1383
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:357
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
MachineInstr.h
llvm::ISD::UMULO
@ UMULO
Definition: ISDOpcodes.h:332
MathExtras.h
Merge
R600 Clause Merge
Definition: R600ClauseMergePass.cpp:70
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::MachineConstantPool::getConstantPoolIndex
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
Definition: MachineFunction.cpp:1441
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::ARMISD::MEMSETLOOP
@ MEMSETLOOP
Definition: ARMISelLowering.h:317
llvm::ShuffleVectorInst::isIdentityMask
static bool isIdentityMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
Definition: Instructions.cpp:2320
llvm::ARMISD::BCC_i64
@ BCC_i64
Definition: ARMISelLowering.h:103
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::ARMTargetLowering::getPreIndexedAddressParts
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
Definition: ARMISelLowering.cpp:19715
LowerSaturatingConditional
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:5288
isVShiftRImm
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
Definition: ARMISelLowering.cpp:6567
llvm::TargetOptions::GuaranteedTailCallOpt
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
Definition: TargetOptions.h:221
PerformAddeSubeCombine
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:13014
llvm::RetCC_ARM_APCS
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ARMISD::VQSHLsuIMM
@ VQSHLsuIMM
Definition: ARMISelLowering.h:169
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
llvm::ARMConstantPoolValue
ARMConstantPoolValue - ARM specific constantpool value.
Definition: ARMConstantPoolValue.h:61
llvm::MVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: MachineValueType.h:1140
llvm::ARM_AM::getFP32FP16Imm
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
Definition: ARMAddressingModes.h:679
llvm::ARMISD::VLD2_UPD
@ VLD2_UPD
Definition: ARMISelLowering.h:332
llvm::MachineFrameInfo::hasVAStart
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
Definition: MachineFrameInfo.h:630
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:986
ARMPerfectShuffle.h
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:386
llvm::ARMTargetLowering::isComplexDeinterleavingOperationSupported
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
Definition: ARMISelLowering.cpp:21925
llvm::ARMISD::SMLALTT
@ SMLALTT
Definition: ARMISelLowering.h:271
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1106
OP_VREV
@ OP_VREV
Definition: ARMISelLowering.cpp:8312
ReplaceCMP_SWAP_64Results
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:10329
llvm::MachineJumpTableInfo::createJumpTableIndex
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
Definition: MachineFunction.cpp:1272
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:34
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:197
llvm::ARMSubtarget::hasARMOps
bool hasARMOps() const
Definition: ARMSubtarget.h:326
llvm::StructType::get
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:408
llvm::ARM::PredBlockMask::TT
@ TT
llvm::HexPrintStyle::Upper
@ Upper
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition: MachineOperand.h:833
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::ARMSubtarget::isTargetAndroid
bool isTargetAndroid() const
Definition: ARMSubtarget.h:406
llvm::ARMISD::VMOVrh
@ VMOVrh
Definition: ARMISelLowering.h:196
llvm::ARMISD::CMP
@ CMP
Definition: ARMISelLowering.h:87
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: MachineValueType.h:376
llvm::TargetMachine::useEmulatedTLS
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Definition: TargetMachine.cpp:146
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::ARMTargetLowering::isDesirableToCommuteXorWithShift
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
Definition: ARMISelLowering.cpp:13687
llvm::ARM_AM::getShiftOpcForNode
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
Definition: ARMSelectionDAGInfo.h:23
llvm::ARMISD::SMLALBB
@ SMLALBB
Definition: ARMISelLowering.h:268
llvm::ISD::SETGT
@ SETGT
Definition: ISDOpcodes.h:1455
llvm::CC_ARM_Win32_CFGuard_Check
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:885
LowerCTTZ
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:6440
llvm::VMOVModImm
@ VMOVModImm
Definition: ARMISelLowering.h:989
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1459
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:157
llvm::ARMISD::CMPZ
@ CMPZ
Definition: ARMISelLowering.h:89
llvm::TargetLowering::getSingleConstraintMatchWeight
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Definition: TargetLowering.cpp:5604
llvm::ARMISD::VADDLVAu
@ VADDLVAu
Definition: ARMISelLowering.h:241
llvm::DataLayout::getPreferredAlign
Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
Definition: DataLayout.cpp:994
llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition: MachineFrameInfo.h:378
llvm::TargetLowering::ConstraintType
ConstraintType
Definition: TargetLowering.h:4620
llvm::ARMTargetLowering::preferIncOfAddToSubOfNot
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Definition: ARMISelLowering.cpp:13728
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:990
llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:322
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::KnownBits::resetAll
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:66
llvm::DiagnosticInfoUnsupported
Diagnostic information for unsupported feature in backend.
Definition: DiagnosticInfo.h:1008
llvm::ARMISD::CALL_NOLINK
@ CALL_NOLINK
Definition: ARMISelLowering.h:71
llvm::SelectionDAG::addNoMergeSiteInfo
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
Definition: SelectionDAG.h:2274
llvm::ARMCP::TPOFF
@ TPOFF
Global Offset Table, Thread Pointer Offset.
Definition: ARMConstantPoolValue.h:51
llvm::ConstantSDNode::getAPIntValue
const APInt & getAPIntValue() const
Definition: SelectionDAGNodes.h:1600
llvm::TargetLowering::CallLoweringInfo::setChain
CallLoweringInfo & setChain(SDValue InChain)
Definition: TargetLowering.h:4243
IntrinsicInst.h
LowerEXTRACT_VECTOR_ELT_i1
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:9027
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:249
llvm::ARMISD::VLD3LN_UPD
@ VLD3LN_UPD
Definition: ARMISelLowering.h:336
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition: Instructions.cpp:2225
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1414
llvm::TargetOptions
Definition: TargetOptions.h:124
AtomicOrdering.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:168
PerformORCombineToSMULWBT
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:14297
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:942
llvm::ISD::AssertSext
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:769
MCInstrDesc.h
llvm::ARMSubtarget
Definition: ARMSubtarget.h:47
llvm::EVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:348
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51
llvm::ISD::ATOMIC_LOAD_UMAX
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1197
llvm::TargetLowering::buildLegalVectorShuffle
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
Definition: TargetLowering.cpp:3572
llvm::HexagonISD::JT
@ JT
Definition: HexagonISelLowering.h:52
AddCombineBUILD_VECTORToVPADDL
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:12557
PerformVCMPCombine
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:15273
T
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:159
llvm::isOneConstant
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Definition: SelectionDAG.cpp:10932
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::Function
Definition: Function.h:59
LowerEXTRACT_SUBVECTOR
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:9164
llvm::SDNode::use_size
size_t use_size() const
Return the number of uses of this node.
Definition: SelectionDAGNodes.h:722
getFunction
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:542
llvm::TargetLowering::TargetLoweringOpt::LegalOps
bool LegalOps
Definition: TargetLowering.h:3657
StringRef.h
llvm::ARMBaseRegisterInfo::getThisReturnPreservedMask
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
Definition: ARMBaseRegisterInfo.cpp:175
llvm::ISD::PRE_DEC
@ PRE_DEC
Definition: ISDOpcodes.h:1383
LowerSDIV_v4i8
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:9616
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:700
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:243
isVUZPMask
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition: ARMISelLowering.cpp:7343
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:324
llvm::ARMTargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: ARMISelLowering.cpp:11824
llvm::ISD::DYNAMIC_STACKALLOC
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:975
LowerPredicateLoad
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:10045
llvm::ISD::CTTZ_ZERO_UNDEF
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:708
getT2IndexedAddressParts
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:19633
llvm::ARMISD::WLS
@ WLS
Definition: ARMISelLowering.h:136
llvm::SDNode::isUndef
bool isUndef() const
Return true if the type of the node type undefined.
Definition: SelectionDAGNodes.h:667
isVEXTMask
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
Definition: ARMISelLowering.cpp:7208
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1884
MatchingStackOffset
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
Definition: ARMISelLowering.cpp:2934
llvm::MachinePointerInfo::getConstantPool
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition: MachineOperand.cpp:1043
llvm::CC_ARM_AAPCS_VFP
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:432
llvm::BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
Definition: SelectionDAG.cpp:11923
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
OP_COPY
@ OP_COPY
Definition: ARMISelLowering.cpp:8311
bitcastf32Toi32
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:5587
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
isVZIP_v_undef_Mask
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
Definition: ARMISelLowering.cpp:7447
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::convertAddSubFlagsOpcode
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
Definition: ARMBaseInstrInfo.cpp:2474
llvm::ComplexDeinterleavingRotation::Rotation_90
@ Rotation_90
llvm::ARMTargetLowering::ExpandInlineAsm
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
Definition: ARMISelLowering.cpp:20108
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:149
llvm::ARMSubtarget::isTargetWatchOS
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:363
llvm::Attribute::get
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:91
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:341
llvm::ARMISD::VMOVIMM
@ VMOVIMM
Definition: ARMISelLowering.h:189
llvm::TargetLoweringBase::MaxStoresPerMemset
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
Definition: TargetLowering.h:3436
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:46
llvm::ISD::MLOAD
@ MLOAD
Definition: ISDOpcodes.h:1211
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1484
llvm::CodeModel::Medium
@ Medium
Definition: CodeGen.h:31
isPowerOf2Constant
static const APInt * isPowerOf2Constant(SDValue V)
Definition: ARMISelLowering.cpp:17855
op
#define op(i)
High
uint64_t High
Definition: NVVMIntrRange.cpp:61
llvm::IntrinsicLowering::LowerToByteSwap
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
Definition: IntrinsicLowering.cpp:454
llvm::ARMCC::GE
@ GE
Definition: ARMBaseInfo.h:41
llvm::SelectionDAG::getFrameIndex
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
Definition: SelectionDAG.cpp:1765
llvm::ARMISD::PRELOAD
@ PRELOAD
Definition: ARMISelLowering.h:131
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
TryCombineBaseUpdate
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
Definition: ARMISelLowering.cpp:15609
Statistic.h
llvm::ISD::FP_TO_UINT_SAT
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:839
Wrapper
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Definition: AMDGPUAliasAnalysis.cpp:31
llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1454
llvm::ARMISD::DYN_ALLOC
@ DYN_ALLOC
Definition: ARMISelLowering.h:127
llvm::KnownBits::isUnknown
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
InlineAsm.h
llvm::ISD::STACKRESTORE
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1056
llvm::ARMTargetLowering::lowerInterleavedLoad
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
Definition: ARMISelLowering.cpp:21470
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:9508
llvm::MCInstrDesc::getSchedClass
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:596
llvm::ARM_MB::SY
@ SY
Definition: ARMBaseInfo.h:74
llvm::ARMSubtarget::hasFPARMv8Base
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:335
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:469
llvm::ARMTargetLowering::getConstraintType
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
Definition: ARMISelLowering.cpp:20162
llvm::MVT::v2i1
@ v2i1
Definition: MachineValueType.h:67
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2264
emitPostLd
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
Definition: ARMISelLowering.cpp:11216
llvm::ARMII::MO_COFFSTUB
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
llvm::ARMTargetLowering::PerformCMOVCombine
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Definition: ARMISelLowering.cpp:18164
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1516
llvm::ARMTargetLowering::getJumpTableEncoding
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
Definition: ARMISelLowering.cpp:3483
llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegacyLegalizerInfo.h:54
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::ARMISD::VIDUP
@ VIDUP
Definition: ARMISelLowering.h:225
llvm::ARMFunctionInfo::isThumb1OnlyFunction
bool isThumb1OnlyFunction() const
Definition: ARMMachineFunctionInfo.h:170
args
nvptx lower args
Definition: NVPTXLowerArgs.cpp:146
PerformMinMaxCombine
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
Definition: ARMISelLowering.cpp:17749
WinDBZCheckDenominator
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
Definition: ARMISelLowering.cpp:10010
llvm::CallingConv::PreserveMost
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
ErrorHandling.h
llvm::isBitwiseNot
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
Definition: SelectionDAG.cpp:11021
isSHL16
static bool isSHL16(const SDValue &Op)
Definition: ARMISelLowering.cpp:1992
llvm::Sched::ILP
@ ILP
Definition: TargetLowering.h:103
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::IRBuilder<>
llvm::ARMFunctionInfo::createPICLabelUId
unsigned createPICLabelUId()
Definition: ARMMachineFunctionInfo.h:236
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:144
llvm::SelectionDAG::getVectorIdxConstant
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1672
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::ARMISD::VLD1x3_UPD
@ VLD1x3_UPD
Definition: ARMISelLowering.h:343
llvm::APInt::zextOrTrunc
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:994
llvm::ARMISD::VLD2DUP_UPD
@ VLD2DUP_UPD
Definition: ARMISelLowering.h:339
ARMMachineFunctionInfo.h
OP_VZIPR
@ OP_VZIPR
Definition: ARMISelLowering.cpp:8323
llvm::ARMTargetLowering::lowerInterleavedStore
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
Definition: ARMISelLowering.cpp:21623
llvm::GlobalAlias
Definition: GlobalAlias.h:28
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1355
llvm::MachineFunction::getFunctionNumber
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
Definition: MachineFunction.h:647
FindBFIToCombineWith
static SDValue FindBFIToCombineWith(SDNode *N)
Definition: ARMISelLowering.cpp:14714
llvm::ARMTargetLowering::SimplifyDemandedBitsForTargetNode
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
Definition: ARMISelLowering.cpp:20066
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:919
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1378
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:766
llvm::ISD::USUBSAT
@ USUBSAT
Definition: ISDOpcodes.h:350
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:463
LowerMUL
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:9541
isVUZP_v_undef_Mask
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
Definition: ARMISelLowering.cpp:7373
llvm::TargetLoweringBase::getLibcallName
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
Definition: TargetLowering.h:3177
llvm::TargetLoweringBase::findRepresentativeClass
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
Definition: TargetLoweringBase.cpp:1258
LowerVECTOR_SHUFFLEUsingMovs
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:8599
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::ARMSubtarget::useFPVFMx
bool useFPVFMx() const
Definition: ARMSubtarget.h:343
MachineBasicBlock.h
llvm::ConstantPool
Definition: ConstantPools.h:43
llvm::TargetLowering::SimplifyDemandedBits
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Definition: TargetLowering.cpp:1061
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:819
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3939
llvm::ARMTargetLowering::isLegalAddressingMode
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
Definition: ARMISelLowering.cpp:19456
llvm::MCInstrDesc::operands
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
llvm::SelectionDAG::ReplaceAllUsesWith
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
Definition: SelectionDAG.cpp:10380
LowerPredicateStore
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:10102
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:210
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:127
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138
llvm::ARMISD::LE
@ LE
Definition: ARMISelLowering.h:139
llvm::ARMISD::VBICIMM
@ VBICIMM
Definition: ARMISelLowering.h:303
llvm::ARMISD::VSHRsIMM
@ VSHRsIMM
Definition: ARMISelLowering.h:158
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2348
llvm::TargetLowering::CallLoweringInfo::setCallee
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
Definition: TargetLowering.h:4262
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:282
llvm::MemOp
Definition: TargetLowering.h:112
llvm::ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
Definition: ARMISelLowering.cpp:21358
llvm::ARMISD::LDRD
@ LDRD
Definition: ARMISelLowering.h:359
llvm::ARMSubtarget::getTargetTriple
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:359
getVCMPCondCode
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
Definition: ARMISelLowering.cpp:14500
llvm::CCValAssign::Indirect
@ Indirect
Definition: CallingConvLower.h:50
llvm::isShiftedMask_32
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:280
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:745
R4
#define R4(n)
OP_VDUP3
@ OP_VDUP3
Definition: ARMISelLowering.cpp:8316
APInt.h
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:236
llvm::ARMSubtarget::isRWPI
bool isRWPI() const
Definition: ARMSubtarget.cpp:347
llvm::ARMTargetLowering::alignLoopsWithOptSize
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
Definition: ARMISelLowering.cpp:21403
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::begin
iterator begin()
Definition: DenseMap.h:75
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::ARMTargetLowering::shouldExpandAtomicLoadInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
Definition: ARMISelLowering.cpp:21155
llvm::TargetLowering::isPositionIndependent
bool isPositionIndependent() const
Definition: TargetLowering.cpp:46
llvm::TargetLowering::C_Memory
@ C_Memory
Definition: TargetLowering.h:4623
llvm::ARMTargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
Definition: ARMISelLowering.cpp:2104
llvm::AAMDNodes
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:651
ReplaceREADCYCLECOUNTER
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:10288
isSRA16
static bool isSRA16(const SDValue &Op)
Definition: ARMISelLowering.cpp:1984
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:315
Shift
bool Shift
Definition: README.txt:468
llvm::ARMTargetLowering::shouldFoldConstantShiftPairToMask
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
Definition: ARMISelLowering.cpp:13711
OP_VUZPL
@ OP_VUZPL
Definition: ARMISelLowering.cpp:8320
SelectionDAGAddressAnalysis.h
PerformORCombine_i1
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:14514
llvm::ARMII::MO_DLLIMPORT
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1449
PerformBFICombine
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:14745
MachineJumpTableInfo.h
llvm::CCState::AnalyzeFormalArguments
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition: CallingConvLower.cpp:82
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
DenseMap.h
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
FPCCToARMCC
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
Definition: ARMISelLowering.cpp:2028
llvm::BranchProbability::getZero
static BranchProbability getZero()
Definition: BranchProbability.h:49
llvm::ARMTargetLowering
Definition: ARMISelLowering.h:390
llvm::ARMSubtarget::isTargetDriverKit
bool isTargetDriverKit() const
Definition: ARMSubtarget.h:365
llvm::ARMSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:489
Module.h
isVMOVNMask
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
Definition: ARMISelLowering.cpp:7538
isVMOVModifiedImm
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
Definition: ARMISelLowering.cpp:6920
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
TryDistrubutionADDVecReduce
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:13412
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:749
ARMCallingConv.h
llvm::AttributeList
Definition: Attributes.h:432
llvm::SelectionDAG::getZeroExtendInReg
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition: SelectionDAG.cpp:1461
llvm::ARMSubtarget::getTargetLowering
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:266
LowerCONCAT_VECTORS
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:9139
llvm::tgtok::Bits
@ Bits
Definition: TGLexer.h:50
TargetInstrInfo.h
llvm::ARMISD::VQSHRNsuIMM
@ VQSHRNsuIMM
Definition: ARMISelLowering.h:172
llvm::ARMTargetLowering::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
Definition: ARMISelLowering.cpp:20806
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1484
expandf64Toi32
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
Definition: ARMISelLowering.cpp:5599
llvm::ARM_AM::getAM2Offset
unsigned getAM2Offset(unsigned AM2Opc)
Definition: ARMAddressingModes.h:406
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:8168
llvm::ARMISD::CMPFPw0
@ CMPFPw0
Definition: ARMISelLowering.h:92
promoteToConstantPool
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
Definition: ARMISelLowering.cpp:3780
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:142
llvm::ARMISD::VDUP
@ VDUP
Definition: ARMISelLowering.h:200
BaseUpdateTarget::isIntrinsic
bool isIntrinsic
Definition: ARMISelLowering.cpp:15595
llvm::ARMISD::VST1_UPD
@ VST1_UPD
Definition: ARMISelLowering.h:347
llvm::Use::getOperandNo
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:889
llvm::ARMSubtarget::getInstrInfo
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:262
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:736
llvm::MaskedStoreSDNode
This class is used to represent an MSTORE node.
Definition: SelectionDAGNodes.h:2687
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
isSignExtended
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
isSignExtended - Check if a node is a vector value that is sign-extended or a constant BUILD_VECTOR w...
Definition: ARMISelLowering.cpp:9383
llvm::ARMISD::PIC_ADD
@ PIC_ADD
Definition: ARMISelLowering.h:81
AddCombineTo64bitMLAL
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:12738
llvm::ARMISD::FMSTAT
@ FMSTAT
Definition: ARMISelLowering.h:95
llvm::EVT::getVectorVT
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
llvm::CallBase::isMustTailCall
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: Instructions.cpp:309
PerformSubCSINCCombine
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:13888
FMAInstKind::Accumulator
@ Accumulator
llvm::TargetLowering::CallLoweringInfo::CB
const CallBase * CB
Definition: TargetLowering.h:4225
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:128
SelectPairHalf
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
Definition: ARMISelLowering.cpp:7251
llvm::SDNode::isPredecessorOf
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
Definition: SelectionDAGNodes.h:830
llvm::ARMISD::MEMCPY
@ MEMCPY
Definition: ARMISelLowering.h:310
llvm::CCValAssign::BCvt
@ BCvt
Definition: CallingConvLower.h:44
llvm::TargetLoweringBase::setMinFunctionAlignment
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
Definition: TargetLowering.h:2519
llvm::ARMISD::BFI
@ BFI
Definition: ARMISelLowering.h:298
llvm::TargetLowering::LowerCallTo
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
Definition: SelectionDAGBuilder.cpp:9869
llvm::SelectionDAG::getNodeIfExists
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
Definition: SelectionDAG.cpp:10092
llvm::FloatABI::Hard
@ Hard
Definition: TargetOptions.h:31
ret
to esp esp setne al movzbw ax esp setg cl movzbw cx cmove cx cl jne LBB1_2 esp ret(also really horrible code on ppc). This is due to the expand code for 64-bit compares. GCC produces multiple branches
llvm::ISD::VECREDUCE_FMAX
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1279
llvm::ARMISD::USAT
@ USAT
Definition: ARMISelLowering.h:101
llvm::ARMTargetLowering::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
Definition: ARMISelLowering.cpp:1937
IntrinsicLowering.h
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1275
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ARMVCC::Then
@ Then
Definition: ARMBaseInfo.h:91
llvm::ARMISD::VST4LN_UPD
@ VST4LN_UPD
Definition: ARMISelLowering.h:353
PerformVECTOR_SHUFFLECombine
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
Definition: ARMISelLowering.cpp:15538
llvm::TargetLowering::CallLoweringInfo::setInRegister
CallLoweringInfo & setInRegister(bool Value=true)
Definition: TargetLowering.h:4298
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:62
llvm::ARMISD::VZIP
@ VZIP
Definition: ARMISelLowering.h:208
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:769
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::ARMISD::WIN__CHKSTK
@ WIN__CHKSTK
Definition: ARMISelLowering.h:133
llvm::ARMISD::SSAT
@ SSAT
Definition: ARMISelLowering.h:100
llvm::ARM_AM::ShiftOpc
ShiftOpc
Definition: ARMAddressingModes.h:27
llvm::ConstantSDNode::isZero
bool isZero() const
Definition: SelectionDAGNodes.h:1610
llvm::TargetLoweringBase::setLibcallCallingConv
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
Definition: TargetLowering.h:3194
llvm::MachineFrameInfo::setAdjustsStack
void setAdjustsStack(bool V)
Definition: MachineFrameInfo.h:610
BaseUpdateUser::Inc
SDValue Inc
Pointer increment operand.
Definition: ARMISelLowering.cpp:15604
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:899
llvm::codeview::EncodedFramePtrReg::StackPtr
@ StackPtr
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::ARMISD::WIN__DBZCHK
@ WIN__DBZCHK
Definition: ARMISelLowering.h:134
getZeroVector
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
Definition: ARMISelLowering.cpp:6283
llvm::ARMISD::VTBL1
@ VTBL1
Definition: ARMISelLowering.h:211
llvm::ARMTargetLowering::shouldConvertConstantLoadToIntImm
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
Definition: ARMISelLowering.cpp:21037
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
STLExtras.h
llvm::ARMTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: ARMISelLowering.cpp:10411
llvm::ARMISD::ASRL
@ ASRL
Definition: ARMISelLowering.h:83
llvm::ISD::VAEND
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1085
llvm::ARMISD::VQRSHRNsIMM
@ VQRSHRNsIMM
Definition: ARMISelLowering.h:175
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1414
llvm::ARMTargetLowering::LowerAsmOperandForConstraint
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
Definition: ARMISelLowering.cpp:20304
llvm::APInt::lshr
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:839
llvm::ARMTargetLowering::shouldExpandAtomicStoreInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
Definition: ARMISelLowering.cpp:21133
llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2336
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::ARMSubtarget::isTargetLinux
bool isTargetLinux() const
Definition: ARMSubtarget.h:366
llvm::SelectionDAG::getZExtOrTrunc
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
Definition: SelectionDAG.cpp:1446
llvm::ARMISD::SERET_FLAG
@ SERET_FLAG
Definition: ARMISelLowering.h:78
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:264
llvm::CodeModel::Kernel
@ Kernel
Definition: CodeGen.h:31
llvm::ARMISD::SMLSLD
@ SMLSLD
Definition: ARMISelLowering.h:274
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
llvm::ARMSubtarget::supportsTailCall
bool supportsTailCall() const
Definition: ARMSubtarget.h:455
llvm::ARMTargetLowering::targetShrinkDemandedConstant
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
Definition: ARMISelLowering.cpp:19984
tmp
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
Definition: README.txt:1347
getDivRemLibcall
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
Definition: ARMISelLowering.cpp:20469
llvm::ISD::SETOEQ
@ SETOEQ
Definition: ISDOpcodes.h:1437
llvm::ARMISD::NodeType
NodeType
Definition: ARMISelLowering.h:56
llvm::ARMISD::VTST
@ VTST
Definition: ARMISelLowering.h:150
llvm::ARMTargetLowering::finalizeLowering
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
Definition: ARMISelLowering.cpp:21916
PerformVMULCombine
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
Definition: ARMISelLowering.cpp:13961
llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:280
llvm::ARMISD::VSHRuIMM
@ VSHRuIMM
Definition: ARMISelLowering.h:159
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:190
genTPLoopBody
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
Definition: ARMISelLowering.cpp:11721
llvm::CallBase::addParamAttr
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1536
MVEMaxSupportedInterleaveFactor
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
getMVEIndexedAddressParts
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:19658
PerformBITCASTCombine
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:18357
IsVUZPShuffleNode
static bool IsVUZPShuffleNode(SDNode *N)
Definition: ARMISelLowering.cpp:12464
SelectionDAG.h
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
llvm::InstrItineraryData::getOperandCycle
int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Definition: MCInstrItineraries.h:167
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:470
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::ARMISD::VMVNIMM
@ VMVNIMM
Definition: ARMISelLowering.h:190
llvm::ARMTargetLowering::getPostIndexedAddressParts
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Definition: ARMISelLowering.cpp:19775
llvm::ARMTargetLowering::emitStoreConditional
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
Definition: ARMISelLowering.cpp:21366
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1903
llvm::ARMCP::SECREL
@ SECREL
Thread Pointer Offset.
Definition: ARMConstantPoolValue.h:52
llvm::ARMTargetLowering::isComplexDeinterleavingSupported
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Definition: ARMISelLowering.cpp:21921
llvm::ARMISD::PREDICATE_CAST
@ PREDICATE_CAST
Definition: ARMISelLowering.h:141
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::ISD::STRICT_FP_TO_UINT
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:442
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
OP_VEXT1
@ OP_VEXT1
Definition: ARMISelLowering.cpp:8317
llvm::ARMISD::SMMLAR
@ SMMLAR
Definition: ARMISelLowering.h:276
llvm::GlobalValue::hasExternalWeakLinkage
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:524
llvm::ARMSubtarget::isTargetWatchABI
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:364
llvm::ARMTargetLowering::isCheapToSpeculateCtlz
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Definition: ARMISelLowering.cpp:21293
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1445
llvm::TypeSize::Fixed
static constexpr TypeSize Fixed(ScalarTy ExactSize)
Definition: TypeSize.h:331
PerformExtendCombine
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
Definition: ARMISelLowering.cpp:17654
llvm::ISD::SMAX
@ SMAX
Definition: ISDOpcodes.h:661
llvm::TargetLoweringBase::setIndexedStoreAction
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
Definition: TargetLowering.h:2439
llvm::MachinePointerInfo::getJumpTable
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
Definition: MachineOperand.cpp:1054
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:481
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::ISD::ATOMIC_LOAD_OR
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1191
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:911
im
#define im(i)
llvm::RTLIB::getUINTTOFP
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition: TargetLoweringBase.cpp:452
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
llvm::ARMTargetLowering::isOffsetFoldingLegal
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Definition: ARMISelLowering.cpp:20770
llvm::ARMISD::MVESEXT
@ MVESEXT
Definition: ARMISelLowering.h:144
llvm::ISD::ADDCARRY
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:278
llvm::ARMTargetLowering::emitLoadLinked
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Definition: ARMISelLowering.cpp:21321
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1000
llvm::APInt::countPopulation
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1619
llvm::ISD::EH_SJLJ_SETJMP
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
llvm::Triple::isWindowsMSVCEnvironment
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:593
MachineRegisterInfo.h
getSwappedCondition
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
Definition: X86InstrInfo.cpp:2795
KnownBits.h
llvm::ComplexDeinterleavingOperation
ComplexDeinterleavingOperation
Definition: ComplexDeinterleavingPass.h:36
llvm::ARMTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
Definition: ARMISelLowering.cpp:18896
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::ShuffleVectorSDNode
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
Definition: SelectionDAGNodes.h:1528
llvm::ARMTargetLowering::isDesirableToTransformToIntegerOp
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
Definition: ARMISelLowering.cpp:18891
LowerAtomicLoadStore
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:10278
llvm::ARMISD::UMLAL
@ UMLAL
Definition: ARMISelLowering.h:265
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2147
IsCMPZCSINC
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
Definition: ARMISelLowering.cpp:14821
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::TargetLoweringBase::setIndexedLoadAction
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Definition: TargetLowering.h:2422
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
llvm::ARMTargetLowering::getTargetNodeName
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Definition: ARMISelLowering.cpp:1666
MachineValueType.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
OP_VDUP2
@ OP_VDUP2
Definition: ARMISelLowering.cpp:8315
llvm::ARMISD::WLSSETUP
@ WLSSETUP
Definition: ARMISelLowering.h:137
llvm::ARMFunctionInfo::setArgumentStackToRestore
void setArgumentStackToRestore(unsigned v)
Definition: ARMMachineFunctionInfo.h:226
llvm::ISD::ROTL
@ ROTL
Definition: ISDOpcodes.h:694
llvm::ISD::AVGFLOORU
@ AVGFLOORU
Definition: ISDOpcodes.h:644
llvm::ARMISD::BR2_JT
@ BR2_JT
Definition: ARMISelLowering.h:76
PerfectShuffleTable
static const unsigned PerfectShuffleTable[6561+1]
Definition: AArch64PerfectShuffle.h:25
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::ARMTargetLowering::PerformIntrinsicCombine
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
Definition: ARMISelLowering.cpp:17269
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ARMFunctionInfo::isCmseNSEntryFunction
bool isCmseNSEntryFunction() const
Definition: ARMMachineFunctionInfo.h:173
llvm::TargetLowering::TargetLoweringOpt::CombineTo
bool CombineTo(SDValue O, SDValue N)
Definition: TargetLowering.h:3668
llvm::CCState::addInRegsParamInfo
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
Definition: CallingConvLower.h:449
llvm::ARMISD::VST2LN_UPD
@ VST2LN_UPD
Definition: ARMISelLowering.h:351
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2511
PerformVECREDUCE_ADDCombine
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:16870
llvm::ISD::EH_SJLJ_LONGJMP
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
llvm::ISD::VECREDUCE_UMAX
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1291
llvm::MVT::v4bf16
@ v4bf16
Definition: MachineValueType.h:160
EnableConstpoolPromotion
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
isZeroVector
static bool isZeroVector(SDValue N)
Definition: ARMISelLowering.cpp:10172
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::ARMISD::QSUB8b
@ QSUB8b
Definition: ARMISelLowering.h:282
llvm::TargetLowering::CallLoweringInfo::setSExtResult
CallLoweringInfo & setSExtResult(bool Value=true)
Definition: TargetLowering.h:4328
llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:1525
llvm::get
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
Definition: PointerIntPair.h:234
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:772
llvm::TargetLoweringBase::preferredShiftLegalizationStrategy
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
Definition: TargetLowering.h:932
llvm::CallInfo
Definition: GVNHoist.cpp:217
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:929
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
CommandLine.h
LowerVecReduceF
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:10271
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1414
isVZIPMask
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition: ARMISelLowering.cpp:7414
llvm::BeforeLegalizeTypes
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:566
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:568
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1007
llvm::TargetLowering::TargetLoweringOpt::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3655
llvm::ARMISD::SMMLSR
@ SMMLSR
Definition: ARMISelLowering.h:277
llvm::ARMSubtarget::isTargetMuslAEABI
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:392
TargetLowering.h
ARMConstantPoolValue.h
llvm::TargetLoweringBase::AtomicExpansionKind::Expand
@ Expand
llvm::ARMII::MO_NO_FLAG
@ MO_NO_FLAG
Definition: ARMBaseInfo.h:246
llvm::ARMISD::VQRSHRNsuIMM
@ VQRSHRNsuIMM
Definition: ARMISelLowering.h:177
PerformSplittingMVETruncToNarrowingStores
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:16555
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
llvm::Type::isArrayTy
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:246
LowerVectorFP_TO_INT
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:5837
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:8118
llvm::ARMISD::VMULLs
@ VMULLs
Definition: ARMISelLowering.h:228
R2
#define R2(n)
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1735
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:362
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:644
llvm::ARM::isBitFieldInvertedMask
bool isBitFieldInvertedMask(unsigned v)
Definition: ARMISelLowering.cpp:20775
llvm::TargetLowering::CallLoweringInfo::IsVarArg
bool IsVarArg
Definition: TargetLowering.h:4203
HA_FLOAT
@ HA_FLOAT
Definition: ARMISelLowering.cpp:21767
llvm::ISD::EH_SJLJ_SETUP_DISPATCH
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:151
llvm::MVT::v8f16
@ v8f16
Definition: MachineValueType.h:150
PerformVMOVrhCombine
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:15044
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:682
attachMEMCPYScratchRegs
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
Definition: ARMISelLowering.cpp:12208
llvm::ISD::STRICT_FP_TO_SINT
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:441
PerformSignExtendInregCombine
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:15457
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:37
MCInstrItineraries.h
getDivRemArgList
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:20487
llvm::ARMTargetLowering::shouldConvertSplatType
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Definition: ARMISelLowering.cpp:19196
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1187
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:78
LowerFP_TO_INT_SAT
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:5906
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:728
llvm::ARMCC::EQ
@ EQ
Definition: ARMBaseInfo.h:31
llvm::ExternalSymbolSDNode
Definition: SelectionDAGNodes.h:2241
llvm::ARM_AM::getSOImmVal
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
Definition: ARMAddressingModes.h:149
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
GlobalValue.h
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1141
llvm::ARMCP::CPBlockAddress
@ CPBlockAddress
Definition: ARMConstantPoolValue.h:40
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1502
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:31
TargetMachine.h
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:702
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
ARMBaseInfo.h
llvm::APInt::lshrInPlace
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:846
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
SelectionDAGNodes.h
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:713
llvm::CallingConv::Swift
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1648
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::SDNode::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this node.
Definition: SelectionDAGNodes.h:718
llvm::ARMISD::LOOP_DEC
@ LOOP_DEC
Definition: ARMISelLowering.h:138
llvm::ARMISD::VMLALVAs
@ VMLALVAs
Definition: ARMISelLowering.h:254
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76
LowerCONCAT_VECTORS_i1
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:9066
LowerBuildVectorOfFPTrunc
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:7590
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
SkipExtensionForVMULL
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
Definition: ARMISelLowering.cpp:9468
llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition: MachineOperand.h:815
ConvertCarryFlagToBooleanCarry
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:5001
LowerVECTOR_SHUFFLE_i1
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:8547
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:674
LowerVectorExtend
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:9294
InlinePriorityMode::Cost
@ Cost
llvm::ISD::SETGE
@ SETGE
Definition: ISDOpcodes.h:1456
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:8220
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::ARMISD::VMOVSR
@ VMOVSR
Definition: ARMISelLowering.h:117
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:126
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:770
Operation
PowerPC Reduce CR logical Operation
Definition: PPCReduceCRLogicals.cpp:735
llvm::SelectionDAG::getObjectPtrOffset
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
Definition: SelectionDAG.h:1005
llvm::User
Definition: User.h:44
CombineVLDDUP
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
Definition: ARMISelLowering.cpp:16201
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:1057
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:781
llvm::ARMISD::t2CALL_BTI
@ t2CALL_BTI
Definition: ARMISelLowering.h:73
LowerBuildVectorOfFPExt
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:7643
llvm::SelectionDAG::getTargetLoweringInfo
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:474
allUsersAreInFunction
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
Definition: ARMISelLowering.cpp:3764
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
Intrinsics.h
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2598
LowerVECTOR_SHUFFLEv8i8
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:8458
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARMBaseRegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition: ARMBaseRegisterInfo.cpp:485
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::ARMISD::VQRSHRNuIMM
@ VQRSHRNuIMM
Definition: ARMISelLowering.h:176
isVShiftLImm
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
Definition: ARMISelLowering.cpp:6553
findPointerConstIncrement
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
Definition: ARMISelLowering.cpp:15934
PerformADDCombineWithOperands
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
Definition: ARMISelLowering.cpp:13391
llvm::ARMTargetLowering::computeKnownBitsForTargetNode
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition: ARMISelLowering.cpp:19864
srl
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 srl
Definition: README.txt:1272
false
Function Alias Analysis false
Definition: AliasAnalysis.cpp:769
llvm::ARMISD::VADDLVAs
@ VADDLVAs
Definition: ARMISelLowering.h:240
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:58
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1540
LowerADDSUBSAT
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:5049
llvm::RTLIB::getFPROUND
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition: TargetLoweringBase.cpp:265
ReplaceLongIntrinsic
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:10530
Twine.h
llvm::EVT::is64BitVector
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:180
llvm::JumpTableSDNode
Definition: SelectionDAGNodes.h:1866
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:308
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3515
PerformVQMOVNCombine
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: ARMISelLowering.cpp:17203
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
llvm::MVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: MachineValueType.h:1144
llvm::ARMSubtarget::useMulOps
bool useMulOps() const
Definition: ARMSubtarget.h:341
llvm::ARMTargetLowering::PerformMVEExtCombine
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
Definition: ARMISelLowering.cpp:18564
llvm::ARMSubtarget::useMovt
bool useMovt() const
Definition: ARMSubtarget.cpp:430
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::ARM_AM::no_shift
@ no_shift
Definition: ARMAddressingModes.h:28
llvm::SelectionDAG::MaskedValueIsZero
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
Definition: SelectionDAG.cpp:2559
llvm::ARM_AM::getAM2Op
AddrOpc getAM2Op(unsigned AM2Opc)
Definition: ARMAddressingModes.h:409
llvm::SyncScope::SingleThread
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
llvm::ARMSubtarget::isTargetHardFloat
bool isTargetHardFloat() const
Definition: ARMSubtarget.cpp:327
PerformVMOVDRRCombine
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
Definition: ARMISelLowering.cpp:14981
llvm::ARMFunctionInfo::setPreservesR0
void setPreservesR0()
Definition: ARMMachineFunctionInfo.h:290
SI
@ SI
Definition: SIInstrInfo.cpp:7993
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:927
llvm::ARMFunctionInfo::getPromotedConstpoolIncrease
int getPromotedConstpoolIncrease() const
Definition: ARMMachineFunctionInfo.h:280
LowerShift
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:6582
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3933
LowerSTORE
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:10140
Domain
Domain
Definition: CorrelatedValuePropagation.cpp:696
LowerMLOAD
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:10178
BaseUpdateTarget::N
SDNode * N
Definition: ARMISelLowering.cpp:15594
llvm::SelectionDAG::InferPtrAlign
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
Definition: SelectionDAG.cpp:11585
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::ARMISD::VQSHRNuIMM
@ VQSHRNuIMM
Definition: ARMISelLowering.h:171
Check
#define Check(C,...)
Definition: Lint.cpp:170
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:769
llvm::ARMTargetLowering::isMaskAndCmp0FoldingBeneficial
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
Definition: ARMISelLowering.cpp:21297
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:341
llvm::RetFastCC_ARM_APCS
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:692
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:234
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:763
llvm::APInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:463
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:258
isVTRN_v_undef_Mask
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
Definition: ARMISelLowering.cpp:7311
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
PerformSplittingToNarrowingStores
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:16462
llvm::MVT::v16i1
@ v16i1
Definition: MachineValueType.h:70
llvm::TargetLowering::CallLoweringInfo::setDebugLoc
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
Definition: TargetLowering.h:4238
llvm::ARMISD::SRL_FLAG
@ SRL_FLAG
Definition: ARMISelLowering.h:105
TargetOpcodes.h
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
llvm::MachinePointerInfo::getGOT
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
Definition: MachineOperand.cpp:1058
llvm::ARMISD::VREV16
@ VREV16
Definition: ARMISelLowering.h:207
PerformFAddVSelectCombine
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:16768
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::dwarf::Index
Index
Definition: Dwarf.h:550
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2356
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::MVT::isVoid
@ isVoid
Definition: MachineValueType.h:284
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:382
llvm::ShuffleVectorSDNode::isSplat
bool isSplat() const
Definition: SelectionDAGNodes.h:1550
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::MIPatternMatch::m_ZeroInt
SpecificConstantMatch m_ZeroInt()
{ Convenience matchers for specific integer values.
Definition: MIPatternMatch.h:238
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:31
llvm::isUIntN
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:256
llvm::ARMTargetLowering::useSoftFloat
bool useSoftFloat() const override
Definition: ARMISelLowering.cpp:1614
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::MVT::v8i1
@ v8i1
Definition: MachineValueType.h:69
llvm::ARMISD::VADDLVps
@ VADDLVps
Definition: ARMISelLowering.h:242
isHomogeneousAggregate
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
Definition: ARMISelLowering.cpp:21773
llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition: SelectionDAGNodes.h:806
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1033
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:84
llvm::ISD::ATOMIC_LOAD_AND
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1189
llvm::ARMISD::SMLALDX
@ SMLALDX
Definition: ARMISelLowering.h:273
ARMBaseInfo.h
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
llvm::ARMISD::VCMP
@ VCMP
Definition: ARMISelLowering.h:148
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
llvm::ARMISD::VMLALVAu
@ VMLALVAu
Definition: ARMISelLowering.h:255
llvm::ARMCC::HI
@ HI
Definition: ARMBaseInfo.h:39
llvm::Instruction
Definition: Instruction.h:41
llvm::ARMConstantPoolSymbol::Create
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
Definition: ARMConstantPoolValue.cpp:233
llvm::ARMISD::VMULLu
@ VMULLu
Definition: ARMISelLowering.h:229
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Concat
static constexpr int Concat[]
Definition: X86InterleavedAccess.cpp:239
llvm::TargetLowering::CallLoweringInfo::DoesNotReturn
bool DoesNotReturn
Definition: TargetLowering.h:4205
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:188
ShuffleOps
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
Definition: InstCombineVectorOps.cpp:763
llvm::AttributeList::hasFnAttr
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
Definition: Attributes.cpp:1479
getVShiftImm
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
Getvshiftimm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
Definition: ARMISelLowering.cpp:6532
llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:354
PerformVDIVCombine
static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations of ...
Definition: ARMISelLowering.cpp:16820
LowerCTPOP
static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:6496
llvm::ISD::ATOMIC_LOAD_UMIN
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1196
llvm::ShuffleVectorSDNode::getMask
ArrayRef< int > getMask() const
Definition: SelectionDAGNodes.h:1540
llvm::ARMISD::VMOVDRR
@ VMOVDRR
Definition: ARMISelLowering.h:116
llvm::CC_ARM_AAPCS
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ARMTargetLowering::preferredShiftLegalizationStrategy
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
Definition: ARMISelLowering.cpp:21313
PerformVCVTCombine
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
Definition: ARMISelLowering.cpp:16720
llvm::SDNode::use_empty
bool use_empty() const
Return true if there are no uses of this node.
Definition: SelectionDAGNodes.h:715
llvm::ARMISD::VLD1DUP
@ VLD1DUP
Definition: ARMISelLowering.h:325
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:773
llvm::Triple::getObjectFormat
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition: Triple.h:381
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
HA_DOUBLE
@ HA_DOUBLE
Definition: ARMISelLowering.cpp:21768
PerformShuffleVMOVNCombine
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:15513
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1494
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ARMFunctionInfo::setVarArgsFrameIndex
void setVarArgsFrameIndex(int Index)
Definition: ARMMachineFunctionInfo.h:241
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:256
llvm::ARMSubtarget::isTargetAEABI
bool isTargetAEABI() const
Definition: ARMSubtarget.h:382
llvm::ARMISD::VQSHLuIMM
@ VQSHLuIMM
Definition: ARMISelLowering.h:168
llvm::ARM::RoundingBitsPos
const unsigned RoundingBitsPos
Definition: ARMISelLowering.h:377
llvm::TargetLoweringBase::getSDagStackGuard
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: TargetLoweringBase.cpp:1987
llvm::ARMBaseRegisterInfo::getCallPreservedMask
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
Definition: ARMBaseRegisterInfo.cpp:131
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:279
APFloat.h
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:926
llvm::ISD::FP16_TO_FP
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:895
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:925
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:642
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1165
llvm::LSBaseSDNode::isUnindexed
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2339
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1789
llvm::ThreadPriority::Low
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
BitVector.h
llvm::pdb::PDB_LocType::TLS
@ TLS
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:887
ARMInterworking
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
PerformMinMaxToSatCombine
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:17708
PerformSHLSimplify
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:13757
llvm::DataLayout::getStackAlignment
Align getStackAlignment() const
Definition: DataLayout.h:271
llvm::ARMISD::SUBC
@ SUBC
Definition: ARMISelLowering.h:111
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7893
llvm::TargetLowering::C_Immediate
@ C_Immediate
Definition: TargetLowering.h:4625
isNEONTwoResultShuffleMask
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
Definition: ARMISelLowering.cpp:7479
isFloatingPointZero
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Definition: ARMISelLowering.cpp:4690
llvm::ARMBaseRegisterInfo::getSjLjDispatchPreservedMask
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
Definition: ARMBaseRegisterInfo.cpp:166
llvm::AArch64PACKey::IA
@ IA
Definition: AArch64BaseInfo.h:791
llvm::ARMISD::VQMOVNu
@ VQMOVNu
Definition: ARMISelLowering.h:217
DebugLoc.h
llvm::EVT::changeVectorElementType
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
llvm::ARMISD::VMINVs
@ VMINVs
Definition: ARMISelLowering.h:259
llvm::ISD::SETCC_INVALID
@ SETCC_INVALID
Definition: ISDOpcodes.h:1462
SmallPtrSet.h
Copies
SI Lower i1 Copies
Definition: SILowerI1Copies.cpp:397
llvm::SelectionDAG::SplitVectorOperand
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
Definition: SelectionDAG.h:2214
llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition: MachineFrameInfo.h:526
llvm::TargetLoweringBase::MaxStoresPerMemcpy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
Definition: TargetLowering.h:3451
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
isThumb
static bool isThumb(const MCSubtargetInfo &STI)
Definition: ARMAsmPrinter.cpp:468
llvm::BitVector
Definition: BitVector.h:75
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:132
llvm::ISD::SMULO
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
llvm::ARMTargetLowering::PerformCMOVToBFICombine
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
Definition: ARMISelLowering.cpp:17863
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1440
llvm::ISD::ABDS
@ ABDS
Definition: ISDOpcodes.h:655
llvm::ARMISD::VST3_UPD
@ VST3_UPD
Definition: ARMISelLowering.h:349
llvm::KnownBits::sext
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:171
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
PatternMatch.h
llvm::TargetLoweringBase::insertSSPDeclarations
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: TargetLoweringBase.cpp:1971
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1775
LowerWRITE_REGISTER
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:3419
llvm::APInt::countTrailingZeros
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1591
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1174
llvm::ARMFunctionInfo::setArgRegsSaveSize
void setArgRegsSaveSize(unsigned s)
Definition: ARMMachineFunctionInfo.h:180
llvm::ISD::FSINCOS
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:959
llvm::MachineFrameInfo::isFixedObjectIndex
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
Definition: MachineFrameInfo.h:688
LowerTruncatei1
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:9218
llvm::CC_ARM_APCS_GHC
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:127
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1383
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:686
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition: TargetLowering.h:1295
llvm::CallInst::isTailCall
bool isTailCall() const
Definition: Instructions.h:1677
llvm::MVT::v1i64
@ v1i64
Definition: MachineValueType.h:130
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::MachineInstr::definesRegister
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
Definition: MachineInstr.h:1428
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:156
isReverseMask
static bool isReverseMask(ArrayRef< int > M, EVT VT)
Definition: ARMISelLowering.cpp:7502
llvm::isVREVMask
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
Definition: ARMTargetTransformInfo.h:334
llvm::ARMISD::VLD1_UPD
@ VLD1_UPD
Definition: ARMISelLowering.h:331
PerformExtractFpToIntStores
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:16597
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2304
llvm::ARM_AM::getT2SOImmVal
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
Definition: ARMAddressingModes.h:307
llvm::MipsISD::ThreadPointer
@ ThreadPointer
Definition: MipsISelLowering.h:91
llvm::ARMTargetLowering::isReadOnly
bool isReadOnly(const GlobalValue *GV) const
Definition: ARMISelLowering.cpp:3876
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1153
llvm::AArch64_AM::getFP32Imm
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
Definition: AArch64AddressingModes.h:394
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
c
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int c
Definition: README.txt:418
llvm::ARMConstantPoolMBB::Create
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
Definition: ARMConstantPoolValue.cpp:272
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:398
llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
llvm::MachineInstrBuilder::addExternalSymbol
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:184
llvm::isIntN
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:261
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:272
isLowerSaturate
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
Definition: ARMISelLowering.cpp:5262
llvm::ARMISD::SMLALD
@ SMLALD
Definition: ARMISelLowering.h:272
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:100
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:190
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition: CallingConvLower.h:120
llvm::TargetLowering::makeLibCall
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Definition: TargetLowering.cpp:144
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:328
llvm::MemSDNode::isVolatile
bool isVolatile() const
Definition: SelectionDAGNodes.h:1314
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:87
isLegalT2AddressImmediate
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:19313
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2063
PerformXORCombine
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:14640
llvm::ARMTargetLowering::getRegForInlineAsmConstraint
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Definition: ARMISelLowering.cpp:20224
PerformORCombine
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
Definition: ARMISelLowering.cpp:14540
llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition: SelectionDAGNodes.h:2392
Type.h
llvm::ARMISD::UQADD8b
@ UQADD8b
Definition: ARMISelLowering.h:285
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1441
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::Triple::isOSMSVCRT
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:626
llvm::VMVNModImm
@ VMVNModImm
Definition: ARMISelLowering.h:990
BranchProbability.h
isVMOVNTruncMask
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
Definition: ARMISelLowering.cpp:7562
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1446
llvm::EABI::EABI4
@ EABI4
llvm::Sched::RegPressure
@ RegPressure
Definition: TargetLowering.h:101
llvm::ISD::VECREDUCE_FMUL
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1277
Int
@ Int
Definition: TargetLibraryInfo.cpp:50
PerformSTORECombine
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
Definition: ARMISelLowering.cpp:16631
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:717
llvm::ARMISD::VADDLVu
@ VADDLVu
Definition: ARMISelLowering.h:239
llvm::ARMSubtarget::hasVFP3Base
bool hasVFP3Base() const
Definition: ARMSubtarget.h:333
llvm::ARMISD::EH_SJLJ_SETUP_DISPATCH
@ EH_SJLJ_SETUP_DISPATCH
Definition: ARMISelLowering.h:121
llvm::ARMTargetLowering::isDesirableToCommuteWithShift
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Definition: ARMISelLowering.cpp:13648
llvm::ARMISD::QADD8b
@ QADD8b
Definition: ARMISelLowering.h:281
llvm::ARMISD::WrapperPIC
@ WrapperPIC
Definition: ARMISelLowering.h:62
llvm::Pass::print
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
llvm::ARMTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: ARMISelLowering.cpp:18668
llvm::MachineInstrBuilder::cloneMemRefs
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Definition: MachineInstrBuilder.h:213
llvm::Triple::isOSVersionLT
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
Helper function for doing comparisons against version numbers included in the target triple.
Definition: Triple.h:463
llvm::ARMTargetLowering::isCheapToSpeculateCttz
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
Definition: ARMISelLowering.cpp:21289
BaseUpdateUser
Definition: ARMISelLowering.cpp:15600
llvm::ComplexDeinterleavingRotation::Rotation_270
@ Rotation_270
llvm::ComplexDeinterleavingRotation
ComplexDeinterleavingRotation
Definition: ComplexDeinterleavingPass.h:44
ParseBFI
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
Definition: ARMISelLowering.cpp:14685
llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: MachineValueType.h:366
llvm::ISD::POST_INC
@ POST_INC
Definition: ISDOpcodes.h:1383
uint64_t
Class for arbitrary precision integers APInt is a functional replacement for common case unsigned integer type like unsigned long or uint64_t
Definition: tmp.txt:1
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:154
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ARMISD::LSRL
@ LSRL
Definition: ARMISelLowering.h:84
llvm::ARMCC::NE
@ NE
Definition: ARMBaseInfo.h:32
llvm::ARMISD::CSINC
@ CSINC
Definition: ARMISelLowering.h:322
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:917
llvm::ARMISD::CALL_PRED
@ CALL_PRED
Definition: ARMISelLowering.h:70
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:222
llvm::TargetLoweringBase::PredictableSelectIsExpensive
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
Definition: TargetLowering.h:3492
llvm::to_vector
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1298
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
llvm::APInt::isSubsetOf
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
llvm::ARM::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Definition: ARMFastISel.cpp:3082
PerformFPExtendCombine
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:17697
llvm::ARMISD::VRSHRuIMM
@ VRSHRuIMM
Definition: ARMISelLowering.h:163
llvm::TargetRegisterInfo::regmaskSubsetEqual
bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const
Return true if all bits that are set in mask mask0 are also set in mask1.
Definition: TargetRegisterInfo.cpp:492
llvm::ARMCC::VS
@ VS
Definition: ARMBaseInfo.h:37
llvm::ARMISD::WrapperJT
@ WrapperJT
Definition: ARMISelLowering.h:64
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:86
llvm::TargetLowering::TargetLoweringOpt
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Definition: TargetLowering.h:3654
llvm::ISD::BlockAddress
@ BlockAddress
Definition: ISDOpcodes.h:84
llvm::ARMCC::AL
@ AL
Definition: ARMBaseInfo.h:45
isBigEndian
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Definition: CombinerHelper.cpp:112
ARMSelectionDAGInfo.h
llvm::InlineAsm
Definition: InlineAsm.h:33
llvm::DataLayout::getPrefTypeAlign
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:857
llvm::TargetLowering::CallLoweringInfo::Outs
SmallVector< ISD::OutputArg, 32 > Outs
Definition: TargetLowering.h:4226
getPointerConstIncrement
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:15912
llvm::ISD::AVGCEILU
@ AVGCEILU
Definition: ISDOpcodes.h:649
PerformSplittingToWideningLoad
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:17573
llvm::MVT::v4i64
@ v4i64
Definition: MachineValueType.h:133
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1450
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::ARMISD::VMLALVApu
@ VMLALVApu
Definition: ARMISelLowering.h:257
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:476
llvm::TargetLowering::CallLoweringInfo::setLibCallee
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
Definition: TargetLowering.h:4249
llvm::ARMFunctionInfo::setIsSplitCSR
void setIsSplitCSR(bool s)
Definition: ARMMachineFunctionInfo.h:247
llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition: MachineInstrBuilder.h:152
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2373
llvm::Type::getArrayElementType
Type * getArrayElementType() const
Definition: Type.h:397
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::TargetLoweringBase::MaxStoresPerMemcpyOptSize
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3453
llvm::LegacyLegalizeActions::Lower
@ Lower
The operation itself must be expressed in terms of simpler actions on this target.
Definition: LegacyLegalizerInfo.h:58
llvm::DataLayout::isBigEndian
bool isBigEndian() const
Definition: DataLayout.h:239
VectorUtils.h
llvm::TargetLowering::softenSetCCOperands
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
Definition: TargetLowering.cpp:289
llvm::ARMISD::SUBS
@ SUBS
Definition: ARMISelLowering.h:98
llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:660
llvm::ARMISD::VBSP
@ VBSP
Definition: ARMISelLowering.h:306
llvm::ARMTargetLowering::shouldExpandAtomicRMWInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition: ARMISelLowering.cpp:21172
llvm::ISD::WRITE_REGISTER
@ WRITE_REGISTER
Definition: ISDOpcodes.h:119
llvm::ARMISD::EH_SJLJ_SETJMP
@ EH_SJLJ_SETJMP
Definition: ARMISelLowering.h:119
llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:955
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:640
llvm::TargetLowering::CW_SpecificReg
@ CW_SpecificReg
Definition: TargetLowering.h:4639
llvm::cl::opt< bool >
PerformCMPZCombine
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:14854
llvm::ISD::AVGCEILS
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:648
val
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 the input will be treated as an leaving the upper bits uninitialised For i64 store i32 val
Definition: README.txt:15
llvm::APFloat
Definition: APFloat.h:744
llvm::SDNode::use_begin
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Definition: SelectionDAGNodes.h:800
llvm::MachineFunction::push_back
void push_back(MachineBasicBlock *MBB)
Definition: MachineFunction.h:887
llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:270
llvm::ARMISD::VMLAVu
@ VMLAVu
Definition: ARMISelLowering.h:247
llvm::Triple::ELF
@ ELF
Definition: Triple.h:284
llvm::ARMISD::EH_SJLJ_LONGJMP
@ EH_SJLJ_LONGJMP
Definition: ARMISelLowering.h:120
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:277
llvm::SDValue::getNumOperands
unsigned getNumOperands() const
Definition: SelectionDAGNodes.h:1145
SMLAL
Should compile to use SMLAL(Signed Multiply Accumulate Long) which multiplies two signed 32-bit values to produce a 64-bit value
llvm::ARMISD::VADDLVAps
@ VADDLVAps
Definition: ARMISelLowering.h:244
llvm::ISD::SADDO
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:301
llvm::TargetLoweringBase::setMaxDivRemBitWidthSupported
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
Definition: TargetLowering.h:2552
llvm::TargetLowering::isConstTrueVal
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
Definition: TargetLowering.cpp:3648
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::ARMTargetLowering::isMulAddWithConstProfitable
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
Definition: ARMISelLowering.cpp:19552
PerformExtractEltToVMOVRRD
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: ARMISelLowering.cpp:15335
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:89
createGPRPairNode
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
Definition: ARMISelLowering.cpp:10311
unsigned
Class for arbitrary precision integers APInt is a functional replacement for common case unsigned integer type like unsigned
Definition: tmp.txt:1
llvm::SelectionDAG::addCallSiteInfo
void addCallSiteInfo(const SDNode *Node, CallSiteInfoImpl &&CallInfo)
Set CallSiteInfo to be associated with Node.
Definition: SelectionDAG.h:2247
ARMBaseRegisterInfo.h
type
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference and DH registers in an instruction requiring REX prefix divb and mulb both produce results in AH If isel emits a CopyFromReg which gets turned into a movb and that can be allocated a r8b r15b To get around isel emits a CopyFromReg from AX and then right shift it down by and truncate it It s not pretty but it works We need some register allocation magic to make the hack go which would often require a callee saved register Callees usually need to keep this value live for most of their body so it doesn t add a significant burden on them We currently implement this in however this is suboptimal because it means that it would be quite awkward to implement the optimization for callers A better implementation would be to relax the LLVM IR rules for sret arguments to allow a function with an sret argument to have a non void return type
Definition: README-X86-64.txt:70
llvm::ARMISD::VEXT
@ VEXT
Definition: ARMISelLowering.h:204
MAKE_CASE
#define MAKE_CASE(V)
MCSchedule.h
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:920
llvm::ISD::VECREDUCE_FMIN
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1280
llvm::MachineBasicBlock::pred_end
pred_iterator pred_end()
Definition: MachineBasicBlock.h:359
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
PerformADDCombine
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
Definition: ARMISelLowering.cpp:13865
PerformInsertSubvectorCombine
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: ARMISelLowering.cpp:15472
llvm::ARMISD::SMULWT
@ SMULWT
Definition: ARMISelLowering.h:264
llvm::ARMSubtarget::isThumb1Only
bool isThumb1Only() const
Definition: ARMSubtarget.h:420
llvm::CCValAssign::SExt
@ SExt
Definition: CallingConvLower.h:35
isLTorLE
static bool isLTorLE(ISD::CondCode CC)
Definition: ARMISelLowering.cpp:5252
llvm::ARMCP::TLSGD
@ TLSGD
None.
Definition: ARMConstantPoolValue.h:48
llvm::IRSimilarity::Legal
@ Legal
Definition: IRSimilarityIdentifier.h:77
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:102
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:329
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
index
splat index
Definition: README_ALTIVEC.txt:181
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
BaseUpdateTarget::AddrOpIdx
unsigned AddrOpIdx
Definition: ARMISelLowering.cpp:15597
llvm::ARMISD::LSLL
@ LSLL
Definition: ARMISelLowering.h:85
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:131
llvm::ISD::ATOMIC_FENCE
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1157
llvm::MVT::v2f16
@ v2f16
Definition: MachineValueType.h:147
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition: CodeGen.h:47
isConditionalZeroOrAllOnes
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:12352
llvm::ISD::ATOMIC_LOAD_MIN
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1194
llvm::ARMISD::VLD2DUP
@ VLD2DUP
Definition: ARMISelLowering.h:326
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:470
RuntimeLibcalls.h
llvm::FastCC_ARM_APCS
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
getVectorTyFromPredicateVector
static EVT getVectorTyFromPredicateVector(EVT VT)
Definition: ARMISelLowering.cpp:8497
llvm::ARMISD::MEMCPYLOOP
@ MEMCPYLOOP
Definition: ARMISelLowering.h:314
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:820
llvm::AArch64::rmMask
@ rmMask
Definition: AArch64ISelLowering.h:501
llvm::ARMFunctionInfo::getVarArgsFrameIndex
int getVarArgsFrameIndex() const
Definition: ARMMachineFunctionInfo.h:240
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1635
llvm::ARMTargetLowering::shouldSinkOperands
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
Definition: ARMISelLowering.cpp:19073
llvm::ARMISD::VUZP
@ VUZP
Definition: ARMISelLowering.h:209
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:942
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1359
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
llvm::TargetLowering::verifyReturnAddressArgumentIsConstant
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:6732
llvm::CallingConv::CXX_FAST_TLS
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1755
llvm::ISD::VECREDUCE_ADD
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1284
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:965
llvm::ISD::VECREDUCE_SMAX
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1289
LowerVSETCC
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:6707
llvm::CCState::getInRegsParamInfo
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
Definition: CallingConvLower.h:438
llvm::ARMISD::CMPFPEw0
@ CMPFPEw0
Definition: ARMISelLowering.h:93
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
PerformADDVecReduce
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:13557
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
GPRArgRegs
static const MCPhysReg GPRArgRegs[]
Definition: ARMISelLowering.cpp:155
llvm::TargetLowering::CallLoweringInfo::Chain
SDValue Chain
Definition: TargetLowering.h:4199
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1660
PerformTruncatingStoreCombine
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:16377
llvm::MachineBasicBlock::setIsEHPad
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
Definition: MachineBasicBlock.h:584
llvm::ISD::AssertZext
@ AssertZext
Definition: ISDOpcodes.h:62
llvm::ARMISD::VQDMULH
@ VQDMULH
Definition: ARMISelLowering.h:231
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:198
llvm::ARMISD::VCMPZ
@ VCMPZ
Definition: ARMISelLowering.h:149
llvm::ARMFunctionInfo::branchTargetEnforcement
bool branchTargetEnforcement() const
Definition: ARMMachineFunctionInfo.h:305
llvm::APInt::logBase2
unsigned logBase2() const
Definition: APInt.h:1700
llvm::ARMSubtarget::isMClass
bool isMClass() const
Definition: ARMSubtarget.h:422
llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1132
llvm::ARMCC::LO
@ LO
Definition: ARMBaseInfo.h:34
PerformBUILD_VECTORCombine
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
Definition: ARMISelLowering.cpp:15091
llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:39
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::ISD::isConstantSplatVector
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
Definition: SelectionDAG.cpp:141
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::ARMISD::RET_FLAG
@ RET_FLAG
Definition: ARMISelLowering.h:77
isConstant
static bool isConstant(const MachineInstr &MI)
Definition: AMDGPUInstructionSelector.cpp:2605
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:31
llvm::ISD::SPONENTRY
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:106
llvm::SDLoc::getDebugLoc
const DebugLoc & getDebugLoc() const
Definition: SelectionDAGNodes.h:1122
Expand64BitShift
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:6625
llvm::ARMISD::BRCOND
@ BRCOND
Definition: ARMISelLowering.h:74
llvm::DenseMap
Definition: DenseMap.h:714
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:795
llvm::SyncScope::ID
uint8_t ID
Definition: LLVMContext.h:46
llvm::CallingConv::ARM_AAPCS
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:108
PerformVMOVhrCombine
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: ARMISelLowering.cpp:14997
llvm::ARMISD::VCVTL
@ VCVTL
Definition: ARMISelLowering.h:222
llvm::MachineConstantPool
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
Definition: MachineConstantPool.h:117
llvm::TargetLowering::CallLoweringInfo::CallConv
CallingConv::ID CallConv
Definition: TargetLowering.h:4220
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::ARMISD::UQSUB16b
@ UQSUB16b
Definition: ARMISelLowering.h:288
llvm::DemandedBits
Definition: DemandedBits.h:40
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::ISD::VECREDUCE_FADD
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1276
llvm::TargetLoweringBase::setStackPointerRegisterToSaveRestore
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
Definition: TargetLowering.h:2322
llvm::MachineOperand::isDead
bool isDead() const
Definition: MachineOperand.h:394
llvm::ISD::CTLZ_ZERO_UNDEF
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:709
llvm::condCodeOp
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
Definition: ARMBaseInstrInfo.h:550
llvm::ISD::ATOMIC_LOAD_NAND
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1193
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:921
genTPEntry
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
Definition: ARMISelLowering.cpp:11683
llvm::ARMII::MO_NONLAZY
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
llvm::TargetLowering::CW_Register
@ CW_Register
Definition: TargetLowering.h:4640
llvm::SelectionDAG::getLogicalNOT
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
Definition: SelectionDAG.cpp:1500
llvm::EVT::getEVT
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:603
llvm::AtomicOrdering::Unordered
@ Unordered
llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition: MachineInstrBuilder.h:46
I
#define I(x, y, z)
Definition: MD5.cpp:58
LowerReverse_VECTOR_SHUFFLE
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:8478
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition: SelectionDAGNodes.h:1800
llvm::ARMISD::VREV64
@ VREV64
Definition: ARMISelLowering.h:205
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:9202
llvm::ARMBaseInstrInfo
Definition: ARMBaseInstrInfo.h:37
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize
bool isBeforeLegalize() const
Definition: TargetLowering.h:3944
llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
llvm::SelectionDAG::getNOT
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition: SelectionDAG.cpp:1496
StringExtras.h
llvm::MachineFrameInfo::computeMaxCallFrameSize
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
Definition: MachineFrameInfo.cpp:187
llvm::ARMISD::VLD2LN_UPD
@ VLD2LN_UPD
Definition: ARMISelLowering.h:335
llvm::ISD::FP_TO_FP16
@ FP_TO_FP16
Definition: ISDOpcodes.h:896
LowerVECTOR_SHUFFLE
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:8736
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
llvm::ISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1172
MCRegisterInfo.h
llvm::TargetLowering::AsmOperandInfo
This contains information for each constraint that we are lowering.
Definition: TargetLowering.h:4647
llvm::ARMSubtarget::isTargetIOS
bool isTargetIOS() const
Definition: ARMSubtarget.h:362
size
i< reg-> size
Definition: README.txt:166
llvm::ISD::UADDSAT
@ UADDSAT
Definition: ISDOpcodes.h:341
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:1038
llvm::ISD::ATOMIC_LOAD_ADD
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1187
PerformVQDMULHCombine
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:13153
llvm::TargetLowering::CallLoweringInfo::DL
SDLoc DL
Definition: TargetLowering.h:4224
MachineConstantPool.h
llvm::ARMISD::VST4_UPD
@ VST4_UPD
Definition: ARMISelLowering.h:350
llvm::ISD::SSUBSAT
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:492
llvm::ARMSubtarget::isTargetELF
bool isTargetELF() const
Definition: ARMSubtarget.h:372
llvm::Function::hasStructRetAttr
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:625
llvm::ARMISD::VMOVRRD
@ VMOVRRD
Definition: ARMISelLowering.h:115
llvm::ARMISD::COPY_STRUCT_BYVAL
@ COPY_STRUCT_BYVAL
Definition: ARMISelLowering.h:67
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:446
llvm::SelectionDAG::getSExtOrTrunc
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
Definition: SelectionDAG.cpp:1440
llvm::MemSDNode::getOriginalAlign
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
Definition: SelectionDAGNodes.h:1292
PerformUMLALCombine
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:12959
ArrayRef.h
llvm::SDValue::getScalarValueSizeInBits
uint64_t getScalarValueSizeInBits() const
Definition: SelectionDAGNodes.h:203
llvm::SelectionDAG::getAnyExtOrTrunc
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
Definition: SelectionDAG.cpp:1434
llvm::EVT::getVectorMinNumElements
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:331
llvm::ARMISD::THREAD_POINTER
@ THREAD_POINTER
Definition: ARMISelLowering.h:125
BaseUpdateUser::N
SDNode * N
Instruction that updates a pointer.
Definition: ARMISelLowering.cpp:15602
llvm::TargetLowering::expandDIVREMByConstant
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
Definition: TargetLowering.cpp:7284
ShuffleOpCodes
ShuffleOpCodes
Definition: ARMISelLowering.cpp:8310
llvm::ARMTargetLowering::isShuffleMaskLegal
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
Definition: ARMISelLowering.cpp:8346
findMUL_LOHI
static SDValue findMUL_LOHI(SDValue V)
Definition: ARMISelLowering.cpp:12654
OP_VTRNR
@ OP_VTRNR
Definition: ARMISelLowering.cpp:8325
llvm::ARMISD::STRD
@ STRD
Definition: ARMISelLowering.h:360
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
BaseUpdateTarget
Load/store instruction that can be merged with a base address update.
Definition: ARMISelLowering.cpp:15593
llvm::SelectionDAG::getAllOnesConstant
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:656
llvm::ARMFunctionInfo::setReturnRegsCount
void setReturnRegsCount(unsigned s)
Definition: ARMMachineFunctionInfo.h:183
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:170
llvm::ARMISD::VST1x4_UPD
@ VST1x4_UPD
Definition: ARMISelLowering.h:356
llvm::ARMCC::LS
@ LS
Definition: ARMBaseInfo.h:40
llvm::ARMTargetLowering::canCombineStoreAndExtract
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
Definition: ARMISelLowering.cpp:21260
checkAndUpdateCPSRKill
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
Definition: ARMISelLowering.cpp:11654
llvm::ARMSubtarget::isTargetCOFF
bool isTargetCOFF() const
Definition: ARMSubtarget.h:371
llvm::shuffle
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1577
llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
llvm::TargetMachine::getTLSModel
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
Definition: TargetMachine.cpp:154
ExpandREAD_REGISTER
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:6146
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:130
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:911
llvm::ISD::MSTORE
@ MSTORE
Definition: ISDOpcodes.h:1212
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2376
llvm::MachineFrameInfo::setHasTailCall
void setHasTailCall(bool V=true)
Definition: MachineFrameInfo.h:639
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1414
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:179
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:46
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:119
llvm::ARMSubtarget::isROPI
bool isROPI() const
Definition: ARMSubtarget.cpp:343
llvm::ISD::SETOGT
@ SETOGT
Definition: ISDOpcodes.h:1438
llvm::MachineFunction::getConstantPool
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Definition: MachineFunction.h:704
llvm::TargetLowering::CallLoweringInfo
This structure contains all information that is necessary for lowering calls.
Definition: TargetLowering.h:4198
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SelectionDAG::getVectorShuffle
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Definition: SelectionDAG.cpp:1964
llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
llvm::SelectionDAG::getMaskedLoad
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
Definition: SelectionDAG.cpp:8909
PerformHWLoopCombine
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:17996
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
llvm::ARMISD::VADDLVApu
@ VADDLVApu
Definition: ARMISelLowering.h:245
AddRequiredExtensionForVMULL
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
Definition: ARMISelLowering.cpp:9422
llvm::ISD::MULHS
@ MULHS
Definition: ISDOpcodes.h:638
llvm::MachineFrameInfo::CreateFixedObject
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition: MachineFrameInfo.cpp:83
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:52
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:925
llvm::CCState::getInRegsParamsCount
unsigned getInRegsParamsCount() const
Definition: CallingConvLower.h:431
llvm::ARMISD::VMINVu
@ VMINVu
Definition: ARMISelLowering.h:258
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:688
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1448
llvm::ISD::DEBUGTRAP
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1135
isVTRNMask
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition: ARMISelLowering.cpp:7279
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1601
llvm::AtomicRMWInst::isFloatingPointOperation
bool isFloatingPointOperation() const
Definition: Instructions.h:889
llvm::SDValue::getResNo
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Definition: SelectionDAGNodes.h:156
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:265
llvm::ARMISD::VMLALVps
@ VMLALVps
Definition: ARMISelLowering.h:252
llvm::Instruction::hasAtomicStore
bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
Definition: Instruction.cpp:693
llvm::MaskedLoadSDNode
This class is used to represent an MLOAD node.
Definition: SelectionDAGNodes.h:2658
llvm::TargetLoweringBase::AtomicExpansionKind::LLSC
@ LLSC
llvm::ARMISD::VSHLs
@ VSHLs
Definition: ARMISelLowering.h:153
ARMBaseInstrInfo.h
llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition: MachineInstrBuilder.h:202
llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition: MachineMemOperand.h:140
llvm::ARMTargetLowering::findRepresentativeClass
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
Definition: ARMISelLowering.cpp:1629
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:1206
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:62
llvm::ARMTargetLowering::shouldInsertFencesForAtomic
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Definition: ARMISelLowering.cpp:21220
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1434
SkipLoadExtensionForVMULL
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
Definition: ARMISelLowering.cpp:9444
llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1523
llvm::gettBLXrOpcode
unsigned gettBLXrOpcode(const MachineFunction &MF)
Definition: ARMBaseInstrInfo.cpp:6737
LowerINSERT_VECTOR_ELT_i1
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:8964
llvm::ARMISD::VECTOR_REG_CAST
@ VECTOR_REG_CAST
Definition: ARMISelLowering.h:142
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9946
llvm::ARMISD::VRSHRsIMM
@ VRSHRsIMM
Definition: ARMISelLowering.h:162
isAddSubZExt
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:9530
llvm::Length
@ Length
Definition: DWP.cpp:406
llvm::ISD::VECREDUCE_AND
@ VECREDUCE_AND
Definition: ISDOpcodes.h:1286
llvm::ARMSubtarget::allowsUnalignedMem
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:457
llvm::SelectionDAG::getBitcast
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
Definition: SelectionDAG.cpp:2250
llvm::MVT::getFloatingPointVT
static MVT getFloatingPointVT(unsigned BitWidth)
Definition: MachineValueType.h:1229
llvm::LegalizeActions::LegalizeAction
LegalizeAction
Definition: LegalizerInfo.h:43
llvm::ISD::RETURNADDR
@ RETURNADDR
Definition: ISDOpcodes.h:95
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition: SelectionDAGNodes.h:3056
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::ARMISD::VADDLVpu
@ VADDLVpu
Definition: ARMISelLowering.h:243
llvm::ARCISD::CMOV
@ CMOV
Definition: ARCISelLowering.h:43
llvm::ARMISD::VST3LN_UPD
@ VST3LN_UPD
Definition: ARMISelLowering.h:352
hasNormalLoadOperand
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
Definition: ARMISelLowering.cpp:15079
PerformSplittingMVEEXTToWideningLoad
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:18497
llvm::FastISel
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition: MachineInstrBuilder.h:123
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:369
llvm::ARMISD::VMOVFPIMM
@ VMOVFPIMM
Definition: ARMISelLowering.h:193
llvm::ISD::SUBCARRY
@ SUBCARRY
Definition: ISDOpcodes.h:304
isTruncMask
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
Definition: ARMISelLowering.cpp:7516
llvm::ARMVCC::None
@ None
Definition: ARMBaseInfo.h:90
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:9714
llvm::ARMTargetLowering::PerformMVETruncCombine
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
Definition: ARMISelLowering.cpp:18395
llvm::ARMISD::VLD4_UPD
@ VLD4_UPD
Definition: ARMISelLowering.h:334
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty
bool empty() const
Definition: DenseMap.h:98
llvm::ARMTargetLowering::emitTrailingFence
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Definition: ARMISelLowering.cpp:21110
llvm::ISD::POST_DEC
@ POST_DEC
Definition: ISDOpcodes.h:1383
llvm::CodeGenOpt::None
@ None
-O0
Definition: CodeGen.h:58
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2397
llvm::ARMSubtarget::useNEONForSinglePrecisionFP
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:328
llvm::ARMRegisterInfo
Definition: ARMRegisterInfo.h:20
PerformVECTOR_REG_CASTCombine
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:15248
llvm::ARMSubtarget::isLittle
bool isLittle() const
Definition: ARMSubtarget.h:463
llvm::SelectionDAG::haveNoCommonBitsSet
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
Definition: SelectionDAG.cpp:5043
llvm::ARMISD::VADDVs
@ VADDVs
Definition: ARMISelLowering.h:234
PerformVSELECTCombine
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:13261
llvm::SelectionDAG::CreateStackTemporary
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
Definition: SelectionDAG.cpp:2368
llvm::ARMBaseRegisterInfo::getTLSCallPreservedMask
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
Definition: ARMBaseRegisterInfo.cpp:159
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:750
llvm::KnownBits::computeForAddSub
static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS, KnownBits RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:57
OtherSucc
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
Definition: ARMISelLowering.cpp:11169
isStore
static bool isStore(int Opcode)
Definition: ARCInstrInfo.cpp:58
llvm::ISD::VASTART
@ VASTART
Definition: ISDOpcodes.h:1086
llvm::MachineInstr::readsRegister
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:1398
llvm::ARMISD::VTBL2
@ VTBL2
Definition: ARMISelLowering.h:212
llvm::ARMISD::QADD16b
@ QADD16b
Definition: ARMISelLowering.h:283
llvm::TargetOptions::EnableFastISel
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
Definition: TargetOptions.h:232
llvm::TargetLoweringBase::getMaxSupportedInterleaveFactor
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
Definition: TargetLowering.h:2897
llvm::MachinePointerInfo::getWithOffset
MachinePointerInfo getWithOffset(int64_t O) const
Definition: MachineMemOperand.h:79
llvm::isMask_32
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:268
info
lazy value info
Definition: LazyValueInfo.cpp:58
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::MVT::bf16
@ bf16
Definition: MachineValueType.h:55
combineSelectAndUse
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
Definition: ARMISelLowering.cpp:12425
llvm::ARMFunctionInfo::getGlobalsPromotedToConstantPool
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
Definition: ARMMachineFunctionInfo.h:277
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::TargetLowering::CW_Default
@ CW_Default
Definition: TargetLowering.h:4643
llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition: AddressRanges.h:153
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:258
getType
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
Definition: M68kELFObjectWriter.cpp:48
llvm::SelectionDAG::getCALLSEQ_END
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition: SelectionDAG.h:1034
llvm::CCState::resultsCompatible
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
Definition: CallingConvLower.cpp:258
llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:128
llvm::ARMISD::VREV32
@ VREV32
Definition: ARMISelLowering.h:206
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:694
Triple.h
llvm::MVT::getVectorVT
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition: MachineValueType.h:1269
llvm::ARM_AM::getFP64Imm
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
Definition: ARMAddressingModes.h:720
llvm::TargetLowering::expandABS
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
Definition: TargetLowering.cpp:8566
llvm::MachineBasicBlock::moveAfter
void moveAfter(MachineBasicBlock *NewBefore)
Definition: MachineBasicBlock.cpp:663
llvm::TargetLowering::CallLoweringInfo::Ins
SmallVector< ISD::InputArg, 32 > Ins
Definition: TargetLowering.h:4228
llvm::ARMISD::VLD3_UPD
@ VLD3_UPD
Definition: ARMISelLowering.h:333
getDebugLoc
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Definition: MachineInstrBundle.cpp:110
llvm::ISD::ConstantPool
@ ConstantPool
Definition: ISDOpcodes.h:82
TargetOptions.h
llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition: ISDOpcodes.h:956
llvm::ISD::GlobalTLSAddress
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
llvm::ARM_AM::sub
@ sub
Definition: ARMAddressingModes.h:38
llvm::CCValAssign::getValNo
unsigned getValNo() const
Definition: CallingConvLower.h:117
llvm::ARMISD::VSRIIMM
@ VSRIIMM
Definition: ARMISelLowering.h:181
llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition: SelectionDAGNodes.h:1323
isLegalT1AddressImmediate
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
Definition: ARMISelLowering.cpp:19287
llvm::ARMISD::VADDVu
@ VADDVu
Definition: ARMISelLowering.h:235
llvm::BlockAddress
The address of a basic block.
Definition: Constants.h:875
canGuaranteeTCO
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Definition: ARMISelLowering.cpp:2315
llvm::MachineInstrBuilder::addRegMask
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Definition: MachineInstrBuilder.h:197
llvm::TargetLowering::CallLoweringInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:4223
llvm::ARMTargetLowering::getSDagStackGuard
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: ARMISelLowering.cpp:21246
llvm::ARMCC::GT
@ GT
Definition: ARMBaseInfo.h:43
llvm::MachineOperand::setIsDef
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Definition: MachineOperand.cpp:107
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::MVT::fixedlen_vector_valuetypes
static auto fixedlen_vector_valuetypes()
Definition: MachineValueType.h:1542
ConstpoolPromotionMaxTotal
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
llvm::ARMISD::MVEZEXT
@ MVEZEXT
Definition: ARMISelLowering.h:145
llvm::ISD::isSEXTLoad
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
Definition: SelectionDAGNodes.h:3068
llvm::ArrayRef< int >
llvm::ARMTargetLowering::createComplexDeinterleavingIR
Value * createComplexDeinterleavingIR(Instruction *I, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
Definition: ARMISelLowering.cpp:21950
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:154
llvm::ISD::isNormalLoad
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Definition: SelectionDAGNodes.h:3049
llvm::ISD::UMAX
@ UMAX
Definition: ISDOpcodes.h:663
llvm::ISD::PRE_INC
@ PRE_INC
Definition: ISDOpcodes.h:1383
llvm::ConstantPoolSDNode
Definition: SelectionDAGNodes.h:1887
ARMAddressingModes.h
llvm::SelectionDAG::getSelect
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:1196
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::CCState::AllocateReg
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
Definition: CallingConvLower.h:328
llvm::ARMSubtarget::getRegisterInfo
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:274
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
LowerTruncate
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:9234
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
llvm::BuildVectorSDNode::isConstantSplat
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
Definition: SelectionDAG.cpp:11740
llvm::SDNode::use_end
static use_iterator use_end()
Definition: SelectionDAGNodes.h:804
llvm::ARMISD::VGETLANEu
@ VGETLANEu
Definition: ARMISelLowering.h:185
DataLayout.h
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:49
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
isSingletonVEXTMask
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
Definition: ARMISelLowering.cpp:7180
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:396
llvm::MachineBasicBlock::isEHPad
bool isEHPad() const
Returns true if the block is a landing pad.
Definition: MachineBasicBlock.h:580
llvm::ARMISD::VMLAVps
@ VMLAVps
Definition: ARMISelLowering.h:248
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:110
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::ARMISD::FIRST_NUMBER
@ FIRST_NUMBER
Definition: ARMISelLowering.h:58
llvm::TargetLowering::LowerToTLSEmulatedModel
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
Definition: TargetLowering.cpp:9440
llvm::KnownBits::zext
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:163
llvm::TargetLowering::LowerAsmOperandForConstraint
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Definition: TargetLowering.cpp:5232
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:352
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:1037
llvm::RTLIB::getFPTOSINT
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition: TargetLoweringBase.cpp:308
llvm::ARMISD::VST1x2_UPD
@ VST1x2_UPD
Definition: ARMISelLowering.h:354
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::BuildVectorSDNode
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Definition: SelectionDAGNodes.h:1992
llvm::ARMISD::VADDVps
@ VADDVps
Definition: ARMISelLowering.h:236
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::ARMTargetLowering::shouldExpandAtomicCmpXchgInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition: ARMISelLowering.cpp:21200
llvm::MachineInstrBuilder::addJumpTableIndex
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:171
llvm::ShuffleVectorSDNode::getMaskElt
int getMaskElt(unsigned Idx) const
Definition: SelectionDAGNodes.h:1545
llvm::ISD::ABDU
@ ABDU
Definition: ISDOpcodes.h:656
llvm::Offset
@ Offset
Definition: DWP.cpp:406
PerformLongShiftCombine
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:17240
llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:132
llvm::Sched::Hybrid
@ Hybrid
Definition: TargetLowering.h:102
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::ARMISD::VLD1x4_UPD
@ VLD1x4_UPD
Definition: ARMISelLowering.h:344
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ISD::SREM
@ SREM
Definition: ISDOpcodes.h:244
llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG
bool isAfterLegalizeDAG() const
Definition: TargetLowering.h:3946
isZeroOrAllOnes
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
Definition: ARMISelLowering.cpp:12336
llvm::ConstantDataArray::get
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:691
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition: ISDOpcodes.h:251
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1551
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:10541
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:168
llvm::ISD::ATOMIC_LOAD_SUB
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1188
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2596
llvm::ARMTargetLowering::isVectorLoadExtDesirable
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
Definition: ARMISelLowering.cpp:19209
uint32_t
llvm::ARMISD::BUILD_VECTOR
@ BUILD_VECTOR
Definition: ARMISelLowering.h:295
Compiler.h
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2014
LowerVECTOR_SHUFFLEUsingOneOff
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:8685
llvm::ARMFunctionInfo::shouldSignReturnAddress
bool shouldSignReturnAddress() const
Definition: ARMMachineFunctionInfo.h:293
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
llvm::TargetLoweringBase::MaxStoresPerMemmoveOptSize
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3488
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1149
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
llvm::ARMISD::VLD1DUP_UPD
@ VLD1DUP_UPD
Definition: ARMISelLowering.h:338
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ARMTargetLowering::getOptimalMemOpType
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
Definition: ARMISelLowering.cpp:18969
llvm::ARMFunctionInfo::setArgumentStackSize
void setArgumentStackSize(unsigned size)
Definition: ARMMachineFunctionInfo.h:223
llvm::ARMISD::VMOVhr
@ VMOVhr
Definition: ARMISelLowering.h:197
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:384
Unsigned
@ Unsigned
Definition: NVPTXISelLowering.cpp:4885
llvm::TargetLowering::DAGCombinerInfo::isCalledByLegalizer
bool isCalledByLegalizer() const
Definition: TargetLowering.h:3948
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ConstantSDNode::getSExtValue
int64_t getSExtValue() const
Definition: SelectionDAGNodes.h:1602
llvm::ARMISD::CMPFPE
@ CMPFPE
Definition: ARMISelLowering.h:91
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:921
llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
llvm::SDValue::hasOneUse
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Definition: SelectionDAGNodes.h:1185
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::HexagonInstrInfo::isLoadFromStackSlot
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Definition: HexagonInstrInfo.cpp:288
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
llvm::ARMSubtarget::useFPVFMx64
bool useFPVFMx64() const
Definition: ARMSubtarget.h:347
llvm::ISD::BUILTIN_OP_END
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1311
llvm::TargetLowering::getRegForInlineAsmConstraint
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
Definition: TargetLowering.cpp:5315
LowerUDIV
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:9722
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2412
llvm::ISD::getSetCCInverse
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition: SelectionDAG.cpp:582
ConvertBooleanCarryToCarryFlag
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:4988
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:112
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:922
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:441
llvm::ARMISD::VRSHRNIMM
@ VRSHRNIMM
Definition: ARMISelLowering.h:164
canChangeToInt
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
Definition: ARMISelLowering.cpp:5566
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
llvm::MachineFunctionProperties::reset
MachineFunctionProperties & reset(Property P)
Definition: MachineFunction.h:202
llvm::ISD::STRICT_FP_EXTEND
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:469
llvm::ARMISD::MVETRUNC
@ MVETRUNC
Definition: ARMISelLowering.h:146
llvm::TargetLoweringBase::setCmpLibcallCC
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero.
Definition: TargetLowering.h:3183
llvm::ARMISD::VST2_UPD
@ VST2_UPD
Definition: ARMISelLowering.h:348
llvm::TargetLoweringBase::setMinStackArgumentAlignment
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
Definition: TargetLowering.h:2538
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
llvm::ARMBaseInstrInfo::getRegisterInfo
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:177
llvm::ISD::VECREDUCE_XOR
@ VECREDUCE_XOR
Definition: ISDOpcodes.h:1288
PerformReduceShuffleCombine
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:17130
llvm::CodeModel::Tiny
@ Tiny
Definition: CodeGen.h:31
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:392
llvm::SelectionDAG::getTargetConstantPool
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:733
llvm::CCState::rewindByValRegsInfo
void rewindByValRegsInfo()
Definition: CallingConvLower.h:470
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::MachineMemOperand::MOVolatile
@ MOVolatile
The memory access is volatile.
Definition: MachineMemOperand.h:138
llvm::ARMISD::SMLSLDX
@ SMLSLDX
Definition: ARMISelLowering.h:275
llvm::ISD::ATOMIC_SWAP
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1186
llvm::OtherModImm
@ OtherModImm
Definition: ARMISelLowering.h:992
llvm::ARMISD::VMLAVs
@ VMLAVs
Definition: ARMISelLowering.h:246
llvm::CCState::nextInRegsParam
bool nextInRegsParam()
Definition: CallingConvLower.h:456
getARMIndexedAddressParts
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:19574
llvm::TargetLoweringBase::ShiftLegalizationStrategy
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
Definition: TargetLowering.h:926
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1414
llvm::Init
Definition: Record.h:282
LowerSETCCCARRY
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:6887
llvm::GlobalValue::isStrongDefinitionForLinker
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:627
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:824
llvm::MVT::v8i64
@ v8i64
Definition: MachineValueType.h:134
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1502
llvm::ComplexDeinterleavingOperation::Shuffle
@ Shuffle
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:718
OP_VEXT3
@ OP_VEXT3
Definition: ARMISelLowering.cpp:8319
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:134
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::ARMSubtarget::isTargetWindows
bool isTargetWindows() const
Definition: ARMSubtarget.h:369
llvm::ARMTargetLowering::isTruncateFree
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition: ARMISelLowering.cpp:18997
llvm::TargetLowering::C_RegisterClass
@ C_RegisterClass
Definition: TargetLowering.h:4622
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::ISD::XOR
@ XOR
Definition: ISDOpcodes.h:668
llvm::TargetLoweringBase::ArgListTy
std::vector< ArgListEntry > ArgListTy
Definition: TargetLowering.h:323
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:408
isSRL16
static bool isSRL16(const SDValue &Op)
Definition: ARMISelLowering.cpp:1976
CanInvertMVEVCMP
static bool CanInvertMVEVCMP(SDValue N)
Definition: ARMISelLowering.cpp:14509
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2367
llvm::SelectionDAG::getTargetJumpTable
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:727
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:184
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1762
llvm::EABI::EABI5
@ EABI5
PerformARMBUILD_VECTORCombine
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
Definition: ARMISelLowering.cpp:15124
llvm::ISD::FRAMEADDR
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
LowerInterruptReturn
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:3125
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:121
llvm::ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
Definition: ARMISelLowering.cpp:21839
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::ARMBaseRegisterInfo
Definition: ARMBaseRegisterInfo.h:127
llvm::AtomicOrdering::Release
@ Release
llvm::ARMISD::VSLIIMM
@ VSLIIMM
Definition: ARMISelLowering.h:180
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:912
HA_VECT128
@ HA_VECT128
Definition: ARMISelLowering.cpp:21770
llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
llvm::ARMFunctionInfo::setPromotedConstpoolIncrease
void setPromotedConstpoolIncrease(int Sz)
Definition: ARMMachineFunctionInfo.h:283
llvm::ISD::INSERT_SUBVECTOR
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:558
CallingConv.h
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::HexagonISD::CP
@ CP
Definition: HexagonISelLowering.h:53
llvm::ISD::READ_REGISTER
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:118
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:358
llvm::ARMISD::CSNEG
@ CSNEG
Definition: ARMISelLowering.h:321
BaseUpdateTarget::isStore
bool isStore
Definition: ARMISelLowering.cpp:15596
Attributes.h
llvm::ARMTargetLowering::getSingleConstraintMatchWeight
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
Definition: ARMISelLowering.cpp:20192
llvm::TargetLowering::CallLoweringInfo::IsTailCall
bool IsTailCall
Definition: TargetLowering.h:4214
llvm::ARMISD::UQSUB8b
@ UQSUB8b
Definition: ARMISelLowering.h:286
llvm::TargetLoweringBase::getSSPStackGuardCheck
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Definition: TargetLoweringBase.cpp:1991
llvm::SDNode::isOnlyUserOf
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
Definition: SelectionDAG.cpp:11228
j
return j(j<< 16)
llvm::ARMISD::VMLALVAps
@ VMLALVAps
Definition: ARMISelLowering.h:256
llvm::EVT::getHalfNumVectorElementsVT
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:420
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1457
llvm::ARMISD::VLD1x2_UPD
@ VLD1x2_UPD
Definition: ARMISelLowering.h:342
llvm::MVT::v8bf16
@ v8bf16
Definition: MachineValueType.h:161
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:10927
Constant.h
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:124
llvm::APInt::countLeadingZeros
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1552
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2597
llvm::CCState::getFirstUnallocated
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
Definition: CallingConvLower.h:313
llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition: CallingConvLower.h:121
llvm::ARMCC::HS
@ HS
Definition: ARMBaseInfo.h:33
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:248
llvm::TargetLoweringBase::IntrinsicInfo
Definition: TargetLowering.h:1051
emitPostSt
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
Definition: ARMISelLowering.cpp:11257
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
PerformVDUPLANECombine
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
Definition: ARMISelLowering.cpp:16280
llvm::ARMISD::VST1x3_UPD
@ VST1x3_UPD
Definition: ARMISelLowering.h:355
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1948
PerformSELECTCombine
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:13042
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:943
ARMRegisterInfo.h
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:242
llvm::ARMISD::VSHLIMM
@ VSHLIMM
Definition: ARMISelLowering.h:157
LowerVASTART
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:4289
llvm::ARM_AM::decodeVMOVModImm
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
Definition: ARMAddressingModes.h:544
llvm::ARMCC::LE
@ LE
Definition: ARMBaseInfo.h:44
Insn
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
Definition: AArch64MIPeepholeOpt.cpp:129
HA_VECT64
@ HA_VECT64
Definition: ARMISelLowering.cpp:21769
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::ARMTargetLowering::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
Definition: ARMISelLowering.cpp:19522
llvm::ARMCC::PL
@ PL
Definition: ARMBaseInfo.h:36
llvm::ARMISD::SMULWB
@ SMULWB
Definition: ARMISelLowering.h:263
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition: SelectionDAG.cpp:2958
llvm::APInt::trunc
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:898
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2390
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1763
llvm::KnownBits
Definition: KnownBits.h:23
llvm::RTLIB::getFPEXT
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition: TargetLoweringBase.cpp:233
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:638
llvm::ARMSubtarget::isTargetDarwin
bool isTargetDarwin() const
Definition: ARMSubtarget.h:361
llvm::ARMISD::VGETLANEs
@ VGETLANEs
Definition: ARMISelLowering.h:186
LowerSDIV
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:9686
llvm::MachineFunctionProperties::Property::NoPHIs
@ NoPHIs
llvm::ISD::EXTRACT_SUBVECTOR
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:572
llvm::ISD::FP_TO_SINT_SAT
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:838
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
llvm::SDNode::getNumOperands
unsigned getNumOperands() const
Return the number of values used by this operation.
Definition: SelectionDAGNodes.h:908
llvm::SelectionDAG::UnrollVectorOp
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
Definition: SelectionDAG.cpp:11439
llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:250
llvm::ARMISD::SRA_FLAG
@ SRA_FLAG
Definition: ARMISelLowering.h:106
llvm::ISD::isEXTLoad
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
Definition: SelectionDAGNodes.h:3062
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition: TargetLowering.h:2599
CallingConvLower.h
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:668
llvm::TargetLowering::CallLoweringInfo::setZExtResult
CallLoweringInfo & setZExtResult(bool Value=true)
Definition: TargetLowering.h:4333
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:10917
llvm::ARMTargetLowering::makeDMB
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
Definition: ARMISelLowering.cpp:21055
llvm::ISD::VECREDUCE_MUL
@ VECREDUCE_MUL
Definition: ISDOpcodes.h:1285
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:295
llvm::ARMISD::VMAXVu
@ VMAXVu
Definition: ARMISelLowering.h:260
llvm::ARMConstantPoolConstant::Create
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
Definition: ARMConstantPoolValue.cpp:148
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:357
llvm::ISD::BR
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:981
LowerBUILD_VECTOR_i1
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:7702
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ARMISD::VQMOVNs
@ VQMOVNs
Definition: ARMISelLowering.h:216
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::ARMISD::VLD4DUP_UPD
@ VLD4DUP_UPD
Definition: ARMISelLowering.h:341
checkVSELConstraints
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
Definition: ARMISelLowering.cpp:5172
llvm::ARMTargetLowering::getSubtarget
const ARMSubtarget * getSubtarget() const
Definition: ARMISelLowering.h:566
llvm::DataLayout::getPrivateGlobalPrefix
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:328
llvm::ARMSubtarget::isAPCS_ABI
bool isAPCS_ABI() const
Definition: ARMSubtarget.cpp:329
llvm::ARMISD::VADDVpu
@ VADDVpu
Definition: ARMISelLowering.h:237
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:915
llvm::ARMTargetLowering::PerformBRCONDCombine
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
Definition: ARMISelLowering.cpp:18124
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:550
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:923
llvm::ISD::getSetCCSwappedOperands
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition: SelectionDAG.cpp:559
llvm::TargetLowering::ConstraintWeight
ConstraintWeight
Definition: TargetLowering.h:4630
llvm::ARMISD::VMLALVu
@ VMLALVu
Definition: ARMISelLowering.h:251
GlobalVariable.h
llvm::TargetLowering::CallLoweringInfo::setDiscardResult
CallLoweringInfo & setDiscardResult(bool Value=true)
Definition: TargetLowering.h:4318
llvm::MachineJumpTableInfo::EK_Inline
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
Definition: MachineJumpTableInfo.h:72
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:914
llvm::CallingConv::SwiftTail
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
llvm::MachineInstrBuilder::addConstantPoolIndex
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:158
llvm::RetCC_ARM_AAPCS
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:468
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:101
ISDOpcodes.h
llvm::CallingConv::ARM_AAPCS_VFP
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:111
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:514
Casting.h
PerformVDUPCombine
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
Definition: ARMISelLowering.cpp:16323
llvm::AArch64ISD::CSINC
@ CSINC
Definition: AArch64ISelLowering.h:85
llvm::pdb::PDB_LocType::Slot
@ Slot
llvm::Triple::MachO
@ MachO
Definition: Triple.h:286
llvm::ARMISD::UMAAL
@ UMAAL
Definition: ARMISelLowering.h:267
llvm::ARMCP::CPLSDA
@ CPLSDA
Definition: ARMConstantPoolValue.h:41
Function.h
isAddSubSExt
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:9519
LowerSDIV_v4i16
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:9647
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:201
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:10064
llvm::ARMSubtarget::isTargetMachO
bool isTargetMachO() const
Definition: ARMSubtarget.h:373
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetLowering::DAGCombinerInfo::AddToWorklist
void AddToWorklist(SDNode *N)
Definition: DAGCombiner.cpp:977
llvm::ARMISD::VDUPLANE
@ VDUPLANE
Definition: ARMISelLowering.h:201
llvm::ARMISD::tSECALL
@ tSECALL
Definition: ARMISelLowering.h:72
llvm::ARMISD::CMN
@ CMN
Definition: ARMISelLowering.h:88
llvm::ARM_AM::getFP16Imm
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
Definition: ARMAddressingModes.h:654
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size
unsigned size() const
Definition: DenseMap.h:99
isGTorGE
static bool isGTorGE(ISD::CondCode CC)
Definition: ARMISelLowering.cpp:5248
llvm::SelectionDAG::getTargetExternalSymbol
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.cpp:1915
llvm::ARMISD::VMLALVpu
@ VMLALVpu
Definition: ARMISelLowering.h:253
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:48
llvm::CCState::getNextStackOffset
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
Definition: CallingConvLower.h:241
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::ARMSubtarget::useFPVFMx16
bool useFPVFMx16() const
Definition: ARMSubtarget.h:346
llvm::ISD::VECREDUCE_OR
@ VECREDUCE_OR
Definition: ISDOpcodes.h:1287
PerformVLDCombine
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: ARMISelLowering.cpp:16067
llvm::X86II::ImmMask
@ ImmMask
Definition: X86BaseInfo.h:849
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1444
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:776
llvm::ARMSubtarget::isAAPCS_ABI
bool isAAPCS_ABI() const
Definition: ARMSubtarget.cpp:333
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:234
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
llvm::ARMISD::INTRET_FLAG
@ INTRET_FLAG
Definition: ARMISelLowering.h:79
llvm::ARMTargetLowering::emitLeadingFence
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
Definition: ARMISelLowering.cpp:21085
OP_VDUP1
@ OP_VDUP1
Definition: ARMISelLowering.cpp:8314
llvm::ARMISD::ADDE
@ ADDE
Definition: ARMISelLowering.h:110
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:145
llvm::ISD::SDIV
@ SDIV
Definition: ISDOpcodes.h:242
llvm::fltNanEncoding::AllOnes
@ AllOnes
llvm::ARM_MB::ISHST
@ ISHST
Definition: ARMBaseInfo.h:69
llvm::ARM_MB::ISH
@ ISH
Definition: ARMBaseInfo.h:70
isZeroExtended
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
isZeroExtended - Check if a node is a vector value that is zero-extended (or any-extended) or a const...
Definition: ARMISelLowering.cpp:9393
llvm::ARMTargetLowering::allowTruncateForTailCall
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
Definition: ARMISelLowering.cpp:19239
llvm::ARMISD::CSINV
@ CSINV
Definition: ARMISelLowering.h:320
llvm::SDNode::getNumValues
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
Definition: SelectionDAGNodes.h:983
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:174
PerformMVEVMULLCombine
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:13992
llvm::RTLIB::getFPTOUINT
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition: TargetLoweringBase.cpp:357
PerformVSetCCToVCTPCombine
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:13301
llvm::TLSModel::Model
Model
Definition: CodeGen.h:45
llvm::APInt::getSplat
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:612
llvm::EVT::getFloatingPointVT
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
llvm::TargetLowering::SimplifyDemandedVectorElts
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
Definition: TargetLowering.cpp:2814
llvm::TargetLoweringBase::ZeroOrOneBooleanContent
@ ZeroOrOneBooleanContent
Definition: TargetLowering.h:233
StringSwitch.h
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::SDNodeFlags
These are IR-level optimization flags that may be propagated to SDNodes.
Definition: SelectionDAGNodes.h:379
llvm::countr_one
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:271
llvm::TargetLoweringBase::getSchedulingPreference
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
Definition: TargetLowering.h:878
llvm::countl_zero
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:245
llvm::TargetLowering::CW_Invalid
@ CW_Invalid
Definition: TargetLowering.h:4632
llvm::ARMCC::CondCodes
CondCodes
Definition: ARMBaseInfo.h:30
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:186
llvm::TargetLoweringBase::setLibcallName
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
Definition: TargetLowering.h:3168
IntCCToARMCC
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
Definition: ARMISelLowering.cpp:2011
OP_VUZPR
@ OP_VUZPR
Definition: ARMISelLowering.cpp:8321
llvm::Type::dump
void dump() const
Definition: AsmWriter.cpp:4946
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:966
llvm::ISD::VACOPY
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1081
llvm::CallBase::getCalledOperand
Value * getCalledOperand() const
Definition: InstrTypes.h:1399
llvm::ARM_AM::getSORegOpc
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
Definition: ARMAddressingModes.h:98
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:116
llvm::TargetLowering::CallLoweringInfo::NoMerge
bool NoMerge
Definition: TargetLowering.h:4210
llvm::Function::arg_begin
arg_iterator arg_begin()
Definition: Function.h:766
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:136
llvm::ARMTargetLowering::getABIAlignmentForCallingConv
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
Definition: ARMISelLowering.cpp:21825
llvm::ARMSubtarget::hasVFP4Base
bool hasVFP4Base() const
Definition: ARMSubtarget.h:334
llvm::CallingConv::ARM_APCS
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
Definition: CallingConv.h:104
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:751
GlobalAlias.h
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:449
HA_UNKNOWN
@ HA_UNKNOWN
Definition: ARMISelLowering.cpp:21766
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:774
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::ISD::SSUBO
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
llvm::TargetLoweringBase::TypePromoteFloat
@ TypePromoteFloat
Definition: TargetLowering.h:215
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1177
llvm::codeview::ModifierOptions::Const
@ Const
llvm::ARMISD::VQSHRNsIMM
@ VQSHRNsIMM
Definition: ARMISelLowering.h:170
llvm::MemSDNode::isSimple
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Definition: SelectionDAGNodes.h:1352
llvm::ARMFunctionInfo::getArgumentStackSize
unsigned getArgumentStackSize() const
Definition: ARMMachineFunctionInfo.h:222
CombineBaseUpdate
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
Definition: ARMISelLowering.cpp:15978
llvm::ARMCP::GOTTPOFF
@ GOTTPOFF
Global Offset Table, PC Relative.
Definition: ARMConstantPoolValue.h:50
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::ARMTargetLowering::isFPImmLegal
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
Definition: ARMISelLowering.cpp:20787
CodeGen.h
PerformShiftCombine
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
Definition: ARMISelLowering.cpp:17496
llvm::ConstantMaterializationCost
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
Definition: ARMBaseInstrInfo.cpp:5628
llvm::TLSModel::InitialExec
@ InitialExec
Definition: CodeGen.h:48
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:31
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:300
llvm::ARMISD::VQSHLsIMM
@ VQSHLsIMM
Definition: ARMISelLowering.h:167
llvm::ARMTargetLowering::getMaxSupportedInterleaveFactor
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
Definition: ARMISelLowering.cpp:21451
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1293
llvm::Function::isVarArg
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:187
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2290
llvm::ISD::SETOGE
@ SETOGE
Definition: ISDOpcodes.h:1439
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:870
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2017
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:106
llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
llvm::ISD::FGETSIGN
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:496
ARMISelLowering.h
isExtendedBUILD_VECTOR
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each element has been zero/sign-...
Definition: ARMISelLowering.cpp:9330
llvm::CombineLevel
CombineLevel
Definition: DAGCombine.h:15
isSigned
static bool isSigned(unsigned int Opcode)
Definition: ExpandLargeDivRem.cpp:52
llvm::RecurKind::FAdd
@ FAdd
Sum of floats.
llvm::Triple::COFF
@ COFF
Definition: Triple.h:282
Instructions.h
llvm::ISD::RegisterMask
@ RegisterMask
Definition: ISDOpcodes.h:75
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:391
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::ISD::PREFETCH
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1145
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
llvm::TargetLoweringBase::AtomicExpansionKind::LLOnly
@ LLOnly
llvm::ARMTargetLowering::shouldConvertFpToSat
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
Definition: ARMISelLowering.cpp:13737
llvm::ISD::READCYCLECOUNTER
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1112
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::TargetMachine::getSubtargetImpl
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition: TargetMachine.h:134
SmallVector.h
llvm::ARMISD::VLD4DUP
@ VLD4DUP
Definition: ARMISelLowering.h:328
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:394
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:1049
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:309
llvm::TargetLowering::parametersInCSRMatch
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
Definition: TargetLowering.cpp:81
User.h
MachineInstrBuilder.h
combineSelectAndUseCommutative
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
Definition: ARMISelLowering.cpp:12451
llvm::ARMISD::VCVTN
@ VCVTN
Definition: ARMISelLowering.h:220
llvm::ARMTargetLowering::isExtractSubvectorCheap
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
Definition: ARMISelLowering.cpp:21047
PerformAddcSubcCombine
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:12981
llvm::ISD::MUL
@ MUL
Definition: ISDOpcodes.h:241
llvm::ISD::UREM
@ UREM
Definition: ISDOpcodes.h:245
LowerBUILD_VECTORToVIDUP
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:7764
llvm::ARMTargetLowering::AdjustInstrPostInstrSelection
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
Definition: ARMISelLowering.cpp:12233
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:199
PerformMVEVLDCombine
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: ARMISelLowering.cpp:16075
AddCombineToVPADD
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:12476
llvm::ARMTargetLowering::getSSPStackGuardCheck
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
Definition: ARMISelLowering.cpp:21253
llvm::SDValue::getConstantOperandAPInt
const APInt & getConstantOperandAPInt(unsigned i) const
Definition: SelectionDAGNodes.h:1157
llvm::KnownBits::mul
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:415
llvm::CCValAssign::getValVT
MVT getValVT() const
Definition: CallingConvLower.h:118
llvm::ARMTargetLowering::CCAssignFnForReturn
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
Definition: ARMISelLowering.cpp:2109
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:56
llvm::SDNode::hasPredecessorHelper
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
Definition: SelectionDAGNodes.h:849
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:273
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1285
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:61
llvm::APInt::countTrailingOnes
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition: APInt.h:1607
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition: ISDOpcodes.h:704
llvm::TargetLoweringBase::setMaxAtomicSizeInBitsSupported
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Definition: TargetLowering.h:2546
llvm::ISD::LAST_INDEXED_MODE
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1385
llvm::ARMISD::MEMBARRIER_MCR
@ MEMBARRIER_MCR
Definition: ARMISelLowering.h:129
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:90
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
llvm::ARMISD::VMLAVpu
@ VMLAVpu
Definition: ARMISelLowering.h:249
llvm::ARMTargetLowering::getNumInterleavedAccesses
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Definition: ARMISelLowering.cpp:21410
llvm::ARMTargetLowering::isFNegFree
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
Definition: ARMISelLowering.cpp:19035
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::ComplexDeinterleavingOperation::CAdd
@ CAdd
isS16
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:2004
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:127
llvm::CallingConv::CFGuard_Check
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
CombineANDShift
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:14143
isValidMVECond
static bool isValidMVECond(unsigned CC, bool IsFloat)
Definition: ARMISelLowering.cpp:14483
llvm::KnownBits::commonBits
static KnownBits commonBits(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits common to LHS and RHS.
Definition: KnownBits.h:315
llvm::SelectionDAG::getRegisterMask
SDValue getRegisterMask(const uint32_t *RegMask)
Definition: SelectionDAG.cpp:2162
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::ARMSubtarget::hasMinSize
bool hasMinSize() const
Definition: ARMSubtarget.h:419
llvm::IntegerType::getBitWidth
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
AddCombineTo64bitUMAAL
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:12904
PerformABSCombine
static SDValue PerformABSCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:13359
llvm::MVT::getVT
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:571
llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
llvm::TargetLowering::DAGCombinerInfo::CombineTo
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
Definition: DAGCombiner.cpp:982
llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition: AtomicOrdering.h:124
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:701
llvm::TargetLoweringBase::getRegClassFor
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
Definition: TargetLowering.h:891
llvm::ARMFunctionInfo::getArgRegsSaveSize
unsigned getArgRegsSaveSize() const
Definition: ARMMachineFunctionInfo.h:179
PerformANDCombine
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:14247
llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition: MachineFunction.cpp:292
llvm::GlobalValue::hasDLLImportStorageClass
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:274
llvm::CallingConv::GHC
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2595
ARMTargetTransformInfo.h
llvm::EVT::isPow2VectorType
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:437
llvm::KnownBits::makeConstant
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:310
llvm::ARMSubtarget::isGVIndirectSymbol
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
Definition: ARMSubtarget.cpp:352
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::CC_ARM_APCS
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None
llvm::ARMISD::CMPFP
@ CMPFP
Definition: ARMISelLowering.h:90
llvm::ISD::isZEXTLoad
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
Definition: SelectionDAGNodes.h:3074
isLowerSaturatingConditional
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
Definition: ARMISelLowering.cpp:5354
llvm::ISD::UMIN
@ UMIN
Definition: ISDOpcodes.h:662
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
llvm::ARMISD::VLD3DUP
@ VLD3DUP
Definition: ARMISelLowering.h:327
AddCombineVUZPToVPADDL
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:12504
llvm::MVT::Untyped
@ Untyped
Definition: MachineValueType.h:286
MachineMemOperand.h
llvm::ARMTargetLowering::getExceptionPointerRegister
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
Definition: ARMISelLowering.cpp:21855
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
areExtractExts
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
Definition: ARMISelLowering.cpp:19055
llvm::ARMISD::TC_RETURN
@ TC_RETURN
Definition: ARMISelLowering.h:123
llvm::ARM_AM::createVMOVModImm
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
Definition: ARMAddressingModes.h:533
llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:637
llvm::TargetLowering::CallLoweringInfo::OutVals
SmallVector< SDValue, 32 > OutVals
Definition: TargetLowering.h:4227
MachineOperand.h
RegName
#define RegName(no)
isLegalMVEShuffleOp
static bool isLegalMVEShuffleOp(unsigned PFEntry)
Definition: ARMISelLowering.cpp:8328
llvm::ARMII::MO_SBREL
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:270
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition: MachineBasicBlock.cpp:911
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1137
llvm::ShuffleVectorSDNode::isSplatMask
static bool isSplatMask(const int *Mask, EVT VT)
Definition: SelectionDAG.cpp:12068
llvm::StringSwitch::Default
R Default(T Value)
Definition: StringSwitch.h:182
llvm::ARMII::MO_GOT
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
llvm::ARM_MB::MemBOpt
MemBOpt
Definition: ARMBaseInfo.h:58
llvm::Type::getInt16Ty
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::APInt::sgt
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
llvm::RTLIB::getSINTTOFP
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition: TargetLoweringBase.cpp:406
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:671
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >
DerivedTypes.h
llvm::ARMISD::VSHLu
@ VSHLu
Definition: ARMISelLowering.h:154
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
BaseUpdateUser::ConstInc
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
Definition: ARMISelLowering.cpp:15606
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1442
llvm::ARMTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
Definition: ARMISelLowering.cpp:10563
llvm::TargetLowering::CallLoweringInfo::Callee
SDValue Callee
Definition: TargetLowering.h:4221
llvm::ARMISD::Wrapper
@ Wrapper
Definition: ARMISelLowering.h:60
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:256
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:289
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:47
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::ARMISD::SMLALBT
@ SMLALBT
Definition: ARMISelLowering.h:269
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1485
llvm::ARMTargetLowering::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Definition: ARMISelLowering.cpp:19538
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:910
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:465
llvm::SelectionDAG::ComputeNumSignBits
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
Definition: SelectionDAG.cpp:4009
llvm::omp::RTLDependInfoFields::Flags
@ Flags
llvm::predOps
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Definition: ARMBaseInstrInfo.h:542
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ARMCC::VC
@ VC
Definition: ARMBaseInfo.h:38
OP_VEXT2
@ OP_VEXT2
Definition: ARMISelLowering.cpp:8318
llvm::CallingConv::Tail
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
llvm::CCState::getInRegsParamsProcessed
unsigned getInRegsParamsProcessed() const
Definition: CallingConvLower.h:434
llvm::ISD::GET_ROUNDING
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:862
llvm::MCOI::TIED_TO
@ TIED_TO
Definition: MCInstrDesc.h:36
isVTBLMask
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
Definition: ARMISelLowering.cpp:7244
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::ISD::VAARG
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1076
llvm::KnownBits::getBitWidth
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
IsQRMVEInstruction
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
Definition: ARMISelLowering.cpp:7799
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::ARMCC::getOppositeCondition
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
llvm::SelectionDAG::getExternalSymbol
SDValue getExternalSymbol(const char *Sym, EVT VT)
Definition: SelectionDAG.cpp:1898
llvm::ARMSubtarget::isThumb2
bool isThumb2() const
Definition: ARMSubtarget.h:421
llvm::ARMCC::MI
@ MI
Definition: ARMBaseInfo.h:35
llvm::MVEVMVNModImm
@ MVEVMVNModImm
Definition: ARMISelLowering.h:991
llvm::ARMISD::SUBE
@ SUBE
Definition: ARMISelLowering.h:112
llvm::ARMISD::CMOV
@ CMOV
Definition: ARMISelLowering.h:97
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
llvm::ARMSubtarget::getPrefLoopLogAlignment
unsigned getPrefLoopLogAlignment() const
Definition: ARMSubtarget.h:537
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition: MachineFrameInfo.h:372
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::Sched::Preference
Preference
Definition: TargetLowering.h:98
getExtensionTo64Bits
static EVT getExtensionTo64Bits(const EVT &OrigVT)
Definition: ARMISelLowering.cpp:9402
llvm::ISD::isNormalStore
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
Definition: SelectionDAGNodes.h:3087
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:411
llvm::ARMFunctionInfo::markGlobalAsPromotedToConstantPool
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
Definition: ARMMachineFunctionInfo.h:274
llvm::ISD::ATOMIC_LOAD_XOR
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1192
llvm::EVT::is128BitVector
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:185
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::MVT::fp_valuetypes
static auto fp_valuetypes()
Definition: MachineValueType.h:1531
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1500
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::MachineJumpTableInfo
Definition: MachineJumpTableInfo.h:42
llvm::ISD::VECREDUCE_SMIN
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1290
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:760
raw_ostream.h
LowerATOMIC_FENCE
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:4225
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:88
llvm::ISD::VECREDUCE_UMIN
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1292
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:983
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::ARMISD::BR_JT
@ BR_JT
Definition: ARMISelLowering.h:75
PerformVMOVRRDCombine
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
Definition: ARMISelLowering.cpp:14889
llvm::TargetLoweringBase::finalizeLowering
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
Definition: TargetLoweringBase.cpp:2244
llvm::SDNodeFlags::hasNoSignedZeros
bool hasNoSignedZeros() const
Definition: SelectionDAGNodes.h:437
llvm::ARMISD::UQADD16b
@ UQADD16b
Definition: ARMISelLowering.h:287
MachineFunction.h
CombineVMOVDRRCandidateWithVecOp
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
Definition: ARMISelLowering.cpp:6171
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:924
AddCombineTo64BitSMLAL16
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:12661
model
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from where P can be anything The alignment inference code cannot handle loads from globals in static non mode because it doesn t look through the extra dyld stub load If you try vec_align ll without relocation model
Definition: README-SSE.txt:414
PerformPREDICATE_CASTCombine
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: ARMISelLowering.cpp:15215
Reduction
loop Loop Strength Reduction
Definition: LoopStrengthReduce.cpp:7017
llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
llvm::ARMISD::CALL
@ CALL
Definition: ARMISelLowering.h:69
llvm::ARMTargetLowering::isLegalT2ScaledAddressingMode
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Definition: ARMISelLowering.cpp:19401
llvm::MachineInstrBundleIterator
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
Definition: MachineInstrBundleIterator.h:108
llvm::SelectionDAG::getSplatValue
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
Definition: SelectionDAG.cpp:2863
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:297
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:98
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:62
llvm::EVT::getDoubleNumVectorElementsVT
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:430
copy
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local copy
Definition: README.txt:101
llvm::InstrItineraryData::isEmpty
bool isEmpty() const
Returns true if there are no itineraries.
Definition: MCInstrItineraries.h:126
llvm::ConstantFPSDNode::getValueAPF
const APFloat & getValueAPF() const
Definition: SelectionDAGNodes.h:1646
LowerEXTRACT_VECTOR_ELT
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:9045
Value.h
llvm::MCInstrDesc::getOperandConstraint
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
llvm::SelectionDAG::getStackArgumentTokenFactor
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
Definition: SelectionDAG.cpp:6836
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1332
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:523
llvm::MachineMemOperand::MONone
@ MONone
Definition: MachineMemOperand.h:132
HABaseType
HABaseType
Definition: ARMISelLowering.cpp:21765
llvm::TargetLoweringBase::getLibcallCallingConv
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
Definition: TargetLowering.h:3199
llvm::ISD::STACKSAVE
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1052
llvm::TargetLoweringBase::MaxStoresPerMemmove
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
Definition: TargetLowering.h:3486
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:918
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
getStOpcode
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
Definition: ARMISelLowering.cpp:11197
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:8135
llvm::EVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition: ValueTypes.h:164
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:852
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:57
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
LowerVecReduce
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition: ARMISelLowering.cpp:10205
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1062
llvm::TargetLoweringBase::setPrefLoopAlignment
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
Definition: TargetLowering.h:2532
llvm::ARMTargetLowering::ARMTargetLowering
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
Definition: ARMISelLowering.cpp:483
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::ISD::ROTR
@ ROTR
Definition: ISDOpcodes.h:695
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:219
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:513
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:469
TargetRegisterInfo.h
llvm::ISD::AVGFLOORS
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:643
Debug.h
PerformVMOVNCombine
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: ARMISelLowering.cpp:17163
llvm::ARMCP::CPValue
@ CPValue
Definition: ARMConstantPoolValue.h:38
llvm::ARMISD::ADDC
@ ADDC
Definition: ARMISelLowering.h:109
llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:139
llvm::ARM_AM::lsl
@ lsl
Definition: ARMAddressingModes.h:30
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::SDNode::hasAnyUseOfValue
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
Definition: SelectionDAG.cpp:11217
llvm::ARMSubtarget::isTargetGNUAEABI
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:387
llvm::ISD::SET_ROUNDING
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:867
llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition: MachineInstrBuilder.h:63
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1161
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:482
PerformInsertEltCombine
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
Definition: ARMISelLowering.cpp:15306
llvm::ComplexDeinterleavingOperation::CMulPartial
@ CMulPartial
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:311
llvm::ARMISD::SMLALTB
@ SMLALTB
Definition: ARMISelLowering.h:270
llvm::ARMISD::VLD4LN_UPD
@ VLD4LN_UPD
Definition: ARMISelLowering.h:337
llvm::ARMTargetLowering::useLoadStackGuardNode
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
Definition: ARMISelLowering.cpp:21225
LowerADDSUBCARRY
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:9799
llvm::ARMSubtarget::hasAnyDataBarrier
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:337
isLegalAddressImmediate
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
Definition: ARMISelLowering.cpp:19367
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1361
llvm::CCValAssign::needsCustom
bool needsCustom() const
Definition: CallingConvLower.h:124
SearchLoopIntrinsic
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
Definition: ARMISelLowering.cpp:17959
llvm::t1CondCodeOp
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
Definition: ARMBaseInstrInfo.h:557
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:606
llvm::ARMCP::SBREL
@ SBREL
Section Relative (Windows TLS)
Definition: ARMConstantPoolValue.h:53
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::VMOVModImmType
VMOVModImmType
Definition: ARMISelLowering.h:988
llvm::TargetLowering::getConstraintType
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Definition: TargetLowering.cpp:5170
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:365
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:703
llvm::ARMISD::VORRIMM
@ VORRIMM
Definition: ARMISelLowering.h:301
llvm::ARMSubtarget::hasVFP2Base
bool hasVFP2Base() const
Definition: ARMSubtarget.h:332
llvm::SelectionDAG::getSetCC
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:1167
llvm::ISD::SADDSAT
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
llvm::TargetLoweringBase::LibCall
@ LibCall
Definition: TargetLowering.h:200
llvm::ARMISD::VMAXVs
@ VMAXVs
Definition: ARMISelLowering.h:261
llvm::ISD::ATOMIC_LOAD_MAX
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1195
llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
llvm::MachineFrameInfo::getFunctionContextIndex
int getFunctionContextIndex() const
Return the index for the function context object.
Definition: MachineFrameInfo.h:364
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7882
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition: TargetLowering.h:992
IsSingleInstrConstant
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
Definition: ARMISelLowering.cpp:7685
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::ISD::EXTRACT_ELEMENT
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:163
llvm::ARMCC::LT
@ LT
Definition: ARMBaseInfo.h:42
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
ConstpoolPromotionMaxSize
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
llvm::ISD::isBuildVectorAllZeros
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
Definition: SelectionDAG.cpp:266
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::MVT::v4i1
@ v4i1
Definition: MachineValueType.h:68
llvm::ARM_AM::getFP32Imm
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
Definition: ARMAddressingModes.h:692
llvm::TLSModel::LocalExec
@ LocalExec
Definition: CodeGen.h:49
llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition: MachineValueType.h:1246
llvm::ARMTargetLowering::isLegalInterleavedAccessType
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
Definition: ARMISelLowering.cpp:21415
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39
PromoteMVEPredVector
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
Definition: ARMISelLowering.cpp:8512
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:809
llvm::ARMISD::VTRN
@ VTRN
Definition: ARMISelLowering.h:210
PerformORCombineToBFI
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition: ARMISelLowering.cpp:14357
llvm::DataLayout::getTypeAllocSize
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:500
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:722
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1925
llvm::ARMTargetLowering::getExceptionSelectorRegister
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Definition: ARMISelLowering.cpp:21862
llvm::RetCC_ARM_AAPCS_VFP
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::MVT::integer_fixedlen_vector_valuetypes
static auto integer_fixedlen_vector_valuetypes()
Definition: MachineValueType.h:1554
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:393
llvm::LLT
Definition: LowLevelTypeImpl.h:39
llvm::ARMISD::LSLS
@ LSLS
Definition: ARMISelLowering.h:113
llvm::HasLowerConstantMaterializationCost
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
Definition: ARMBaseInstrInfo.cpp:5661